From 5bf2105ee83aeac96ad35f77431d504d8cd71374 Mon Sep 17 00:00:00 2001 From: timkaler Date: Thu, 7 Nov 2019 16:47:19 -0500 Subject: [PATCH] Selective cachereads (#21) * Enable cache reads by default, which is needed for correctness. * Selectively omit caching for reads whose value does not change after the load instruction. Loads that are modified after the load instruction are called "uncacheable" in the code. * Propagate the uncacheable status of pointer arguments to calls. * readwriteread C test illustrates behavior of code. --- enzyme/Enzyme/Enzyme.cpp | 3 +- enzyme/Enzyme/EnzymeLogic.cpp | 447 ++++++++++++++++-- enzyme/Enzyme/EnzymeLogic.h | 2 +- enzyme/Enzyme/FunctionUtils.cpp | 32 +- enzyme/Enzyme/GradientUtils.cpp | 13 +- enzyme/Enzyme/GradientUtils.h | 5 +- enzyme/functional_tests_c/Makefile | 4 +- enzyme/functional_tests_c/insertsort_sum.c | 39 +- .../functional_tests_c/insertsort_sum_alt.c | 4 +- enzyme/functional_tests_c/readwriteread.c | 46 ++ enzyme/functional_tests_c/setup.sh | 4 +- .../testfiles/readwriteread-enzyme0.test | 6 + .../testfiles/readwriteread-enzyme1.test | 6 + .../testfiles/readwriteread-enzyme2.test | 6 + .../testfiles/readwriteread-enzyme3.test | 6 + enzyme/test/Enzyme/badcall.ll | 30 +- enzyme/test/Enzyme/badcall2.ll | 25 +- enzyme/test/Enzyme/badcall3.ll | 27 +- enzyme/test/Enzyme/badcall4.ll | 17 +- enzyme/test/Enzyme/badcallused.ll | 44 +- enzyme/test/Enzyme/badcallused2.ll | 42 +- 21 files changed, 639 insertions(+), 169 deletions(-) create mode 100644 enzyme/functional_tests_c/readwriteread.c create mode 100644 enzyme/functional_tests_c/testfiles/readwriteread-enzyme0.test create mode 100644 enzyme/functional_tests_c/testfiles/readwriteread-enzyme1.test create mode 100644 enzyme/functional_tests_c/testfiles/readwriteread-enzyme2.test create mode 100644 enzyme/functional_tests_c/testfiles/readwriteread-enzyme3.test diff --git a/enzyme/Enzyme/Enzyme.cpp b/enzyme/Enzyme/Enzyme.cpp index 1bd358db06aa9..19fdeb5fb37cb 100644 --- a/enzyme/Enzyme/Enzyme.cpp +++ b/enzyme/Enzyme/Enzyme.cpp @@ -155,7 +155,8 @@ void HandleAutoDiff(CallInst *CI, TargetLibraryInfo &TLI, AAResults &AA) {//, Lo bool differentialReturn = cast(fn)->getReturnType()->isFPOrFPVectorTy(); - auto newFunc = CreatePrimalAndGradient(cast(fn), constants, TLI, AA, /*should return*/false, differentialReturn, /*topLevel*/true, /*addedType*/nullptr);//, LI, DT); + std::set volatile_args; + auto newFunc = CreatePrimalAndGradient(cast(fn), constants, TLI, AA, /*should return*/false, differentialReturn, /*topLevel*/true, /*addedType*/nullptr, volatile_args);//, LI, DT); if (differentialReturn) args.push_back(ConstantFP::get(cast(fn)->getReturnType(), 1.0)); diff --git a/enzyme/Enzyme/EnzymeLogic.cpp b/enzyme/Enzyme/EnzymeLogic.cpp index 62af08b003b26..e069915897924 100644 --- a/enzyme/Enzyme/EnzymeLogic.cpp +++ b/enzyme/Enzyme/EnzymeLogic.cpp @@ -32,6 +32,7 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Cloning.h" @@ -46,15 +47,320 @@ using namespace llvm; llvm::cl::opt enzyme_print("enzyme_print", cl::init(false), cl::Hidden, cl::desc("Print before and after fns for autodiff")); -cl::opt cachereads( - "enzyme_cachereads", cl::init(false), cl::Hidden, - cl::desc("Force caching of all reads")); +cl::opt cache_reads_always( + "enzyme_always_cache_reads", cl::init(false), cl::Hidden, + cl::desc("Force always caching of all reads")); + +cl::opt cache_reads_never( + "enzyme_never_cache_reads", cl::init(false), cl::Hidden, + cl::desc("Force never caching of all reads")); + + + +// Computes a map of LoadInst -> boolean for a function indicating whether that load is "uncacheable". +// A load is considered "uncacheable" if the data at the loaded memory location can be modified after +// the load instruction. +std::map compute_uncacheable_load_map(GradientUtils* gutils, AAResults& AA, TargetLibraryInfo& TLI, + const std::set uncacheable_args) { + std::map can_modref_map; + for(BasicBlock* BB: gutils->originalBlocks) { + for (auto I = BB->begin(), E = BB->end(); I != E; I++) { + Instruction* inst = &*I; + // For each load instruction, determine if it is uncacheable. + if (auto op = dyn_cast(inst)) { + // NOTE(TFK): The reasoning behind skipping ConstantValues and ConstantInstructions needs to be fleshed out. + //if (gutils->isConstantValue(inst) || gutils->isConstantInstruction(inst)) { + // continue; + //} + + bool can_modref = false; + // Find the underlying object for the pointer operand of the load instruction. + auto obj = GetUnderlyingObject(op->getPointerOperand(), BB->getModule()->getDataLayout(), 100); + // If the pointer operand is from an argument to the function, we need to check if the argument + // received from the caller is uncacheable. + if (auto arg = dyn_cast(obj)) { + if (uncacheable_args.find(arg->getArgNo()) != uncacheable_args.end()) { + can_modref = true; + } + } else { + // NOTE(TFK): In the case where the underlying object for the pointer operand is from a Load or Call we need + // to check if we need to cache. Likely, we need to play it safe in this case and cache. + // NOTE(TFK): The logic below is an attempt at a conservative handling of the case mentioned above, but it + // needs to be verified. + + // Pointer operands originating from call instructions that are not malloc/free are conservatively considered uncacheable. + if (auto obj_op = dyn_cast(obj)) { + Function* called = obj_op->getCalledFunction(); + if (auto castinst = dyn_cast(obj_op->getCalledValue())) { + if (castinst->isCast()) { + if (auto fn = dyn_cast(castinst->getOperand(0))) { + if (isAllocationFunction(*fn, TLI) || isDeallocationFunction(*fn, TLI)) { + called = fn; + } + } + } + } + if (isCertainMallocOrFree(called)) { + //llvm::errs() << "OP is certain malloc or free: " << *op << "\n"; + } else { + //llvm::errs() << "OP is a non malloc/free call so we need to cache " << *op << "\n"; + can_modref = true; + } + } else if (isa(obj)) { + // If obj is from a load instruction conservatively consider it uncacheable. + can_modref = true; + } else { + // In absence of more information, assume that the underlying object for pointer operand is uncacheable in caller. + can_modref = true; + } + } + + for (BasicBlock* BB2 : gutils->originalBlocks) { + for (auto I2 = BB2->begin(), E2 = BB2->end(); I2 != E2; I2++) { + Instruction* inst2 = &*I2; + if (inst == inst2) continue; + if (!gutils->DT.dominates(inst2, inst)) { + if (llvm::isModSet(AA.getModRefInfo(inst2, MemoryLocation::get(op)))) { + can_modref = true; + //llvm::errs() << *inst << " needs to be cached due to: " << *inst2 << "\n"; + break; + } + } + } + } + can_modref_map[inst] = can_modref; + } + } + } + return can_modref_map; +} + +std::set compute_uncacheable_args_for_one_callsite(Instruction* callsite_inst, DominatorTree &DT, + TargetLibraryInfo &TLI, AAResults& AA, GradientUtils* gutils, const std::set parent_uncacheable_args) { + CallInst* callsite_op = dyn_cast(callsite_inst); + assert(callsite_op != nullptr); + + std::set uncacheable_args; + std::vector args; + std::vector args_safe; + + // First, we need to propagate the uncacheable status from the parent function to the callee. + // because memory location x modified after parent returns => x modified after callee returns. + for (unsigned i = 0; i < callsite_op->getNumArgOperands(); i++) { + args.push_back(callsite_op->getArgOperand(i)); + bool init_safe = true; + + // If the UnderlyingObject is from one of this function's arguments, then we need to propagate the volatility. + Value* obj = GetUnderlyingObject(callsite_op->getArgOperand(i), + callsite_inst->getParent()->getModule()->getDataLayout(), + 100); + // If underlying object is an Argument, check parent volatility status. + if (auto arg = dyn_cast(obj)) { + if (parent_uncacheable_args.find(arg->getArgNo()) != parent_uncacheable_args.end()) { + init_safe = false; + } + } else { + // Pointer operands originating from call instructions that are not malloc/free are conservatively considered uncacheable. + if (auto obj_op = dyn_cast(obj)) { + Function* called = obj_op->getCalledFunction(); + if (auto castinst = dyn_cast(obj_op->getCalledValue())) { + if (castinst->isCast()) { + if (auto fn = dyn_cast(castinst->getOperand(0))) { + if (isAllocationFunction(*fn, TLI) || isDeallocationFunction(*fn, TLI)) { + called = fn; + } + } + } + } + if (isCertainMallocOrFree(called)) { + //llvm::errs() << "OP is certain malloc or free: " << *op << "\n"; + } else { + //llvm::errs() << "OP is a non malloc/free call so we need to cache " << *op << "\n"; + init_safe = false; + } + } else if (isa(obj)) { + // If obj is from a load instruction conservatively consider it uncacheable. + init_safe = false; + } else { + // In absence of more information, assume that the underlying object for pointer operand is uncacheable in caller. + init_safe = false; + } + } + // TODO(TFK): Also need to check whether underlying object is traced to load / non-allocating-call instruction. + args_safe.push_back(init_safe); + } + + // Second, we check for memory modifications that can occur in the continuation of the + // callee inside the parent function. + for(BasicBlock* BB: gutils->originalBlocks) { + for (auto I = BB->begin(), E = BB->end(); I != E; I++) { + Instruction* inst = &*I; + + // If the "inst" does not dominate "callsite_inst" then we cannot prove that + // "inst" happens before "callsite_inst". If "inst" modifies an argument of the call, + // then that call needs to consider the argument uncacheable. + // To correctly handle case where inst == callsite_inst, we need to look at next instruction after callsite_inst. + if (!gutils->DT.dominates(inst, callsite_inst->getNextNonDebugInstruction())) { + //llvm::errs() << "Instruction " << *inst << " DOES NOT dominates " << *callsite_inst << "\n"; + // Consider Store Instructions. + if (auto op = dyn_cast(inst)) { + for (unsigned i = 0; i < args.size(); i++) { + // If the modification flag is set, then this instruction may modify the $i$th argument of the call. + if (!llvm::isModSet(AA.getModRefInfo(op, MemoryLocation::getForArgument(callsite_op, i, TLI)))) { + //llvm::errs() << "Instruction " << *op << " is NoModRef with call argument " << *args[i] << "\n"; + } else { + //llvm::errs() << "Instruction " << *op << " is maybe ModRef with call argument " << *args[i] << "\n"; + args_safe[i] = false; + } + } + } + + // Consider Call Instructions. + if (auto op = dyn_cast(inst)) { + //llvm::errs() << "OP is call inst: " << *op << "\n"; + // Ignore memory allocation functions. + Function* called = op->getCalledFunction(); + if (auto castinst = dyn_cast(op->getCalledValue())) { + if (castinst->isCast()) { + if (auto fn = dyn_cast(castinst->getOperand(0))) { + if (isAllocationFunction(*fn, TLI) || isDeallocationFunction(*fn, TLI)) { + called = fn; + } + } + } + } + if (isCertainMallocOrFree(called)) { + //llvm::errs() << "OP is certain malloc or free: " << *op << "\n"; + continue; + } + + // For all the arguments, perform same check as for Stores, but ignore non-pointer arguments. + for (unsigned i = 0; i < args.size(); i++) { + if (!args[i]->getType()->isPointerTy()) continue; // Ignore non-pointer arguments. + if (!llvm::isModSet(AA.getModRefInfo(op, MemoryLocation::getForArgument(callsite_op, i, TLI)))) { + //llvm::errs() << "Instruction " << *op << " is NoModRef with call argument " << *args[i] << "\n"; + } else { + //llvm::errs() << "Instruction " << *op << " is maybe ModRef with call argument " << *args[i] << "\n"; + args_safe[i] = false; + } + } + } + } else { + //llvm::errs() << "Instruction " << *inst << " DOES dominates " << *callsite_inst << "\n"; + } + } + } + + //llvm::errs() << "CallInst: " << *callsite_op<< "CALL ARGUMENT INFO: \n"; + for (unsigned i = 0; i < args.size(); i++) { + if (!args_safe[i]) { + uncacheable_args.insert(i); + } + //llvm::errs() << "Arg: " << *args[i] << " STATUS: " << args_safe[i] << "\n"; + } + return uncacheable_args; +} + +// Given a function and the arguments passed to it by its caller that are uncacheable (_uncacheable_args) compute +// the set of uncacheable arguments for each callsite inside the function. A pointer argument is uncacheable at +// a callsite if the memory pointed to might be modified after that callsite. +std::map > compute_uncacheable_args_for_callsites( + Function* F, DominatorTree &DT, TargetLibraryInfo &TLI, AAResults& AA, GradientUtils* gutils, + const std::set uncacheable_args) { + std::map > uncacheable_args_map; + for(BasicBlock* BB: gutils->originalBlocks) { + for (auto I = BB->begin(), E = BB->end(); I != E; I++) { + Instruction* inst = &*I; + if (auto op = dyn_cast(inst)) { + + // We do not need uncacheable args for intrinsic functions. So skip such callsites. + if(isa(inst)) { + continue; + } + + // We do not need uncacheable args for memory allocation functions. So skip such callsites. + Function* called = op->getCalledFunction(); + if (auto castinst = dyn_cast(op->getCalledValue())) { + if (castinst->isCast()) { + if (auto fn = dyn_cast(castinst->getOperand(0))) { + if (isAllocationFunction(*fn, TLI) || isDeallocationFunction(*fn, TLI)) { + called = fn; + } + } + } + } + if (isCertainMallocOrFree(called)) { + continue; + } + + // For all other calls, we compute the uncacheable args for this callsite. + uncacheable_args_map[op] = compute_uncacheable_args_for_one_callsite(inst, + DT, TLI, AA, gutils, uncacheable_args); + } + } + } + return uncacheable_args_map; +} + +// Determine if a load is needed in the reverse pass. We only use this logic in the top level function right now. +bool is_load_needed_in_reverse(GradientUtils* gutils, AAResults& AA, Instruction* inst) { + + std::vector uses_list; + std::set uses_set; + uses_list.push_back(inst); + uses_set.insert(inst); + + while (true) { + bool new_user_added = false; + for (unsigned i = 0; i < uses_list.size(); i++) { + for (auto use = uses_list[i]->user_begin(), end = uses_list[i]->user_end(); use != end; ++use) { + Value* v = (*use); + //llvm::errs() << "Use list: " << *v << "\n"; + if (uses_set.find(v) == uses_set.end()) { + uses_set.insert(v); + uses_list.push_back(v); + new_user_added = true; + } + } + } + if (!new_user_added) break; + } + //llvm::errs() << "Analysis for load " << *inst << " which has nuses: " << inst->getNumUses() << "\n"; + for (unsigned i = 0; i < uses_list.size(); i++) { + //llvm::errs() << "Considering use " << *uses_list[i] << "\n"; + if (uses_list[i] == dyn_cast(inst)) continue; + + if (isa(uses_list[i]) || isa(uses_list[i]) || isa(uses_list[i]) || isa(uses_list[i]) || isa(uses_list[i]) || isa(uses_list[i]) || + isa(uses_list[i]) /*|| isa(uses_list[i])*/){ + continue; + } + + if (auto op = dyn_cast(uses_list[i])) { + if (op->getOpcode() == Instruction::FAdd || op->getOpcode() == Instruction::FSub) { + continue; + } else { + //llvm::errs() << "Need value of " << *inst << "\n" << "\t Due to " << *op << "\n"; + return true; + } + } + + //if (auto op = dyn_cast(uses_list[i])) { + // llvm::errs() << "Need value of " << *inst << "\n" << "\t Due to " << *op << "\n"; + // return true; + //} + + //llvm::errs() << "Need value of " << *inst << "\n" << "\t Due to " << *uses_list[i] << "\n"; + //return true; + } + return false; +} + //! return structtype if recursive function -std::pair CreateAugmentedPrimal(Function* todiff, AAResults &AA, const std::set& constant_args, TargetLibraryInfo &TLI, bool differentialReturn, bool returnUsed) { - static std::map, bool/*differentialReturn*/, bool/*returnUsed*/>, std::pair> cachedfunctions; - static std::map, bool/*differentialReturn*/, bool/*returnUsed*/>, bool> cachedfinished; - auto tup = std::make_tuple(todiff, std::set(constant_args.begin(), constant_args.end()), differentialReturn, returnUsed); +std::pair CreateAugmentedPrimal(Function* todiff, AAResults &global_AA, const std::set& constant_args, TargetLibraryInfo &TLI, bool differentialReturn, bool returnUsed, const std::set _uncacheable_args) { + static std::map/*constant_args*/, std::set/*uncacheable_args*/, bool/*differentialReturn*/, bool/*returnUsed*/>, std::pair> cachedfunctions; + static std::map/*constant_args*/, std::set/*uncacheable_args*/, bool/*differentialReturn*/, bool/*returnUsed*/>, bool> cachedfinished; + auto tup = std::make_tuple(todiff, std::set(constant_args.begin(), constant_args.end()), std::set(_uncacheable_args.begin(), _uncacheable_args.end()), differentialReturn, returnUsed); if (cachedfunctions.find(tup) != cachedfunctions.end()) { return cachedfunctions[tup]; } @@ -104,15 +410,44 @@ std::pair CreateAugmentedPrimal(Function* todiff, AAResul //assert(st->getNumElements() > 0); return cachedfunctions[tup] = std::pair(foundcalled, nullptr); //dyn_cast(st->getElementType(0))); } + + + + + if (todiff->empty()) { llvm::errs() << *todiff << "\n"; } assert(!todiff->empty()); - + AAResults AA(TLI); GradientUtils *gutils = GradientUtils::CreateFromClone(todiff, AA, TLI, constant_args, /*returnValue*/returnUsed ? ReturnType::TapeAndReturns : ReturnType::Tape, /*differentialReturn*/differentialReturn); cachedfunctions[tup] = std::pair(gutils->newFunc, nullptr); cachedfinished[tup] = false; + std::map > uncacheable_args_map = + compute_uncacheable_args_for_callsites(gutils->oldFunc, gutils->DT, TLI, AA, gutils, _uncacheable_args); + + std::map can_modref_map = compute_uncacheable_load_map(gutils, AA, TLI, _uncacheable_args); + gutils->can_modref_map = &can_modref_map; + + // Allow forcing cache reads to be on or off using flags. + assert(!(cache_reads_always && cache_reads_never) && "Both cache_reads_always and cache_reads_never are true. This doesn't make sense."); + if (cache_reads_always || cache_reads_never) { + bool is_needed = cache_reads_always ? true : false; + for (auto iter = can_modref_map.begin(); iter != can_modref_map.end(); iter++) { + can_modref_map[iter->first] = is_needed; + } + } + + + //for (auto iter = can_modref_map.begin(); iter != can_modref_map.end(); iter++) { + // if (iter->second) { + // bool is_needed = is_load_needed_in_reverse(gutils, AA, iter->first); + // can_modref_map[iter->first] = is_needed; + // } + //} + + gutils->forceContexts(); gutils->forceAugmentedReturns(); @@ -364,7 +699,7 @@ std::pair CreateAugmentedPrimal(Function* todiff, AAResul } } - auto newcalled = CreateAugmentedPrimal(dyn_cast(called), AA, subconstant_args, TLI, /*differentialReturn*/subdifferentialreturn, /*return is used*/subretused).first; + auto newcalled = CreateAugmentedPrimal(dyn_cast(called), global_AA, subconstant_args, TLI, /*differentialReturn*/subdifferentialreturn, /*return is used*/subretused, uncacheable_args_map[op]).first; auto augmentcall = BuilderZ.CreateCall(newcalled, args); assert(augmentcall->getType()->isStructTy()); augmentcall->setCallingConv(op->getCallingConv()); @@ -395,20 +730,20 @@ std::pair CreateAugmentedPrimal(Function* todiff, AAResul gutils->addMalloc(BuilderZ, rv); } - if ((op->getType()->isPointerTy() || op->getType()->isIntegerTy()) && subdifferentialreturn) { + if ((op->getType()->isPointerTy() || op->getType()->isIntegerTy()) && gutils->invertedPointers.count(op) != 0) { auto placeholder = cast(gutils->invertedPointers[op]); if (I != E && placeholder == &*I) I++; gutils->invertedPointers.erase(op); - - assert(cast(augmentcall->getType())->getNumElements() == 3); - auto antiptr = cast(BuilderZ.CreateExtractValue(augmentcall, {2}, "antiptr_" + op->getName() )); - gutils->invertedPointers[rv] = antiptr; - placeholder->replaceAllUsesWith(antiptr); - - if (shouldCache) { - gutils->addMalloc(BuilderZ, antiptr); + if (subdifferentialreturn) { + assert(cast(augmentcall->getType())->getNumElements() == 3); + auto antiptr = cast(BuilderZ.CreateExtractValue(augmentcall, {2}, "antiptr_" + op->getName() )); + gutils->invertedPointers[rv] = antiptr; + placeholder->replaceAllUsesWith(antiptr); + + if (shouldCache) { + gutils->addMalloc(BuilderZ, antiptr); + } } - gutils->erase(placeholder); } else { if (cast(augmentcall->getType())->getNumElements() != 2) { @@ -422,12 +757,20 @@ std::pair CreateAugmentedPrimal(Function* todiff, AAResul } gutils->replaceAWithB(op,rv); + } else { + if ((op->getType()->isPointerTy() || op->getType()->isIntegerTy()) && gutils->invertedPointers.count(op) != 0) { + auto placeholder = cast(gutils->invertedPointers[op]); + if (I != E && placeholder == &*I) I++; + gutils->invertedPointers.erase(op); + gutils->erase(placeholder); + } + } gutils->erase(op); } else if(LoadInst* li = dyn_cast(inst)) { if (gutils->isConstantInstruction(inst) || gutils->isConstantValue(inst)) continue; - if (cachereads) { + if (can_modref_map[inst]) { llvm::errs() << "Forcibly caching reads " << *li << "\n"; IRBuilder<> BuilderZ(li); gutils->addMalloc(BuilderZ, li); @@ -901,7 +1244,8 @@ std::pair,SmallVector> getDefaultFunctionTypeForGr return std::pair,SmallVector>(args, outs); } -void handleGradientCallInst(BasicBlock::reverse_iterator &I, const BasicBlock::reverse_iterator &E, IRBuilder <>& Builder2, CallInst* op, DiffeGradientUtils* const gutils, TargetLibraryInfo &TLI, AAResults &AA, const bool topLevel, const std::map &replacedReturns) { +void handleGradientCallInst(BasicBlock::reverse_iterator &I, const BasicBlock::reverse_iterator &E, IRBuilder <>& Builder2, CallInst* op, DiffeGradientUtils* const gutils, TargetLibraryInfo &TLI, AAResults &AA, AAResults & global_AA, const bool topLevel, const std::map &replacedReturns, std::set uncacheable_args) { + llvm::errs() << "HandleGradientCall " << *op << "\n"; Function *called = op->getCalledFunction(); if (auto castinst = dyn_cast(op->getCalledValue())) { @@ -1111,6 +1455,8 @@ void handleGradientCallInst(BasicBlock::reverse_iterator &I, const BasicBlock::r ModRefInfo mri = ModRefInfo::NoModRef; if (iter->mayReadOrWriteMemory()) { + llvm::errs() << "Iter is at " << *iter << "\n"; + llvm::errs() << "origop is at " << *origop << "\n"; mri = AA.getModRefInfo(&*iter, origop); } @@ -1242,7 +1588,7 @@ void handleGradientCallInst(BasicBlock::reverse_iterator &I, const BasicBlock::r if (modifyPrimal && called) { bool subretused = op->getNumUses() != 0; bool subdifferentialreturn = (!gutils->isConstantValue(op)) && subretused; - auto fnandtapetype = CreateAugmentedPrimal(cast(called), AA, subconstant_args, TLI, /*differentialReturns*/subdifferentialreturn, /*return is used*/subretused); + auto fnandtapetype = CreateAugmentedPrimal(cast(called), global_AA, subconstant_args, TLI, /*differentialReturns*/subdifferentialreturn, /*return is used*/subretused, uncacheable_args); if (topLevel) { Function* newcalled = fnandtapetype.first; augmentcall = BuilderZ.CreateCall(newcalled, pre_args); @@ -1314,7 +1660,7 @@ void handleGradientCallInst(BasicBlock::reverse_iterator &I, const BasicBlock::r bool subdiffereturn = (!gutils->isConstantValue(op)) && !( op->getType()->isPointerTy() || op->getType()->isIntegerTy() || op->getType()->isEmptyTy() ); llvm::errs() << "subdifferet:" << subdiffereturn << " " << *op << "\n"; if (called) { - newcalled = CreatePrimalAndGradient(cast(called), subconstant_args, TLI, AA, /*returnValue*/retUsed, /*subdiffereturn*/subdiffereturn, /*topLevel*/replaceFunction, tape ? tape->getType() : nullptr);//, LI, DT); + newcalled = CreatePrimalAndGradient(cast(called), subconstant_args, TLI, global_AA, /*returnValue*/retUsed, /*subdiffereturn*/subdiffereturn, /*topLevel*/replaceFunction, tape ? tape->getType() : nullptr, uncacheable_args);//, LI, DT); } else { newcalled = gutils->invertPointerM(op->getCalledValue(), Builder2); auto ft = cast(cast(op->getCalledValue()->getType())->getElementType()); @@ -1424,7 +1770,7 @@ void handleGradientCallInst(BasicBlock::reverse_iterator &I, const BasicBlock::r } } -Function* CreatePrimalAndGradient(Function* todiff, const std::set& constant_args, TargetLibraryInfo &TLI, AAResults &AA, bool returnValue, bool differentialReturn, bool topLevel, llvm::Type* additionalArg) { +Function* CreatePrimalAndGradient(Function* todiff, const std::set& constant_args, TargetLibraryInfo &TLI, AAResults &global_AA, bool returnValue, bool differentialReturn, bool topLevel, llvm::Type* additionalArg, std::set _uncacheable_args) { if (differentialReturn) { if(!todiff->getReturnType()->isFPOrFPVectorTy()) { llvm::errs() << *todiff << "\n"; @@ -1436,13 +1782,17 @@ Function* CreatePrimalAndGradient(Function* todiff, const std::set& co llvm::errs() << "addl arg: " << *additionalArg << "\n"; } if (additionalArg) assert(additionalArg->isStructTy()); - - static std::map, bool/*retval*/, bool/*differentialReturn*/, bool/*topLevel*/, llvm::Type*>, Function*> cachedfunctions; - auto tup = std::make_tuple(todiff, std::set(constant_args.begin(), constant_args.end()), returnValue, differentialReturn, topLevel, additionalArg); + static std::map/*constant_args*/, std::set/*uncacheable_args*/, bool/*retval*/, bool/*differentialReturn*/, bool/*topLevel*/, llvm::Type*>, Function*> cachedfunctions; + auto tup = std::make_tuple(todiff, std::set(constant_args.begin(), constant_args.end()), std::set(_uncacheable_args.begin(), _uncacheable_args.end()), returnValue, differentialReturn, topLevel, additionalArg); if (cachedfunctions.find(tup) != cachedfunctions.end()) { return cachedfunctions[tup]; } + + + + bool hasTape = false; + if (constant_args.size() == 0 && !topLevel && !returnValue && hasMetadata(todiff, "enzyme_gradient")) { auto md = todiff->getMetadata("enzyme_gradient"); @@ -1458,7 +1808,6 @@ Function* CreatePrimalAndGradient(Function* todiff, const std::set& co auto res = getDefaultFunctionTypeForGradient(todiff->getFunctionType(), /*has return value*/!todiff->getReturnType()->isVoidTy(), differentialReturn); - bool hasTape = false; if (foundcalled->arg_size() == res.first.size() + 1 /*tape*/) { auto lastarg = foundcalled->arg_end(); @@ -1526,10 +1875,37 @@ Function* CreatePrimalAndGradient(Function* todiff, const std::set& co auto M = todiff->getParent(); auto& Context = M->getContext(); - + AAResults AA(TLI); DiffeGradientUtils *gutils = DiffeGradientUtils::CreateFromClone(todiff, AA, TLI, constant_args, returnValue ? ReturnType::ArgsWithReturn : ReturnType::Args, differentialReturn, additionalArg); cachedfunctions[tup] = gutils->newFunc; + std::map > uncacheable_args_map = + compute_uncacheable_args_for_callsites(gutils->oldFunc, gutils->DT, TLI, AA, gutils, _uncacheable_args); + + std::map can_modref_map; + // NOTE(TFK): Sanity check this decision. + // Is it always possibly to recompute the result of loads at top level? + can_modref_map = compute_uncacheable_load_map(gutils, AA, TLI, _uncacheable_args); + if (topLevel) { + for (auto iter = can_modref_map.begin(); iter != can_modref_map.end(); iter++) { + if (iter->second) { + bool is_needed = is_load_needed_in_reverse(gutils, AA, iter->first); + can_modref_map[iter->first] = is_needed; + } + } + } + + // Allow forcing cache reads to be on or off using flags. + assert(!(cache_reads_always && cache_reads_never) && "Both cache_reads_always and cache_reads_never are true. This doesn't make sense."); + if (cache_reads_always || cache_reads_never) { + bool is_needed = cache_reads_always ? true : false; + for (auto iter = can_modref_map.begin(); iter != can_modref_map.end(); iter++) { + can_modref_map[iter->first] = is_needed; + } + } + + gutils->can_modref_map = &can_modref_map; + gutils->forceContexts(true); gutils->forceAugmentedReturns(); @@ -1602,7 +1978,6 @@ Function* CreatePrimalAndGradient(Function* todiff, const std::set& co } } - for(BasicBlock* BB: gutils->originalBlocks) { auto BB2 = gutils->reverseBlocks[BB]; assert(BB2); @@ -1648,6 +2023,8 @@ Function* CreatePrimalAndGradient(Function* todiff, const std::set& co assert(0 && "unknown terminator inst"); } + + for (BasicBlock::reverse_iterator I = BB->rbegin(), E = BB->rend(); I != E;) { Instruction* inst = &*I; assert(inst); @@ -1696,6 +2073,7 @@ Function* CreatePrimalAndGradient(Function* todiff, const std::set& co break; } default: + //continue; // NOTE(TFK) added this. assert(op); llvm::errs() << *gutils->newFunc << "\n"; llvm::errs() << "cannot handle unknown binary operator: " << *op << "\n"; @@ -1932,7 +2310,7 @@ Function* CreatePrimalAndGradient(Function* todiff, const std::set& co if (dif0) addToDiffe(op->getOperand(0), dif0); if (dif1) addToDiffe(op->getOperand(1), dif1); } else if(auto op = dyn_cast_or_null(inst)) { - handleGradientCallInst(I, E, Builder2, op, gutils, TLI, AA, topLevel, replacedReturns); + handleGradientCallInst(I, E, Builder2, op, gutils, TLI, global_AA, global_AA, topLevel, replacedReturns, uncacheable_args_map[op]); } else if(auto op = dyn_cast_or_null(inst)) { if (gutils->isConstantValue(inst)) continue; if (op->getType()->isPointerTy()) continue; @@ -1949,15 +2327,12 @@ Function* CreatePrimalAndGradient(Function* todiff, const std::set& co if (dif1) addToDiffe(op->getOperand(1), dif1); if (dif2) addToDiffe(op->getOperand(2), dif2); } else if(auto op = dyn_cast(inst)) { - if (gutils->isConstantValue(inst)) continue; - - + if (gutils->isConstantValue(inst) || gutils->isConstantInstruction(inst)) continue; auto op_operand = op->getPointerOperand(); auto op_type = op->getType(); - if (cachereads) { - llvm::errs() << "Forcibly loading cached reads " << *op << "\n"; + if (can_modref_map[inst]) { IRBuilder<> BuilderZ(op->getNextNode()); inst = cast(gutils->addMalloc(BuilderZ, inst)); if (inst != op) { diff --git a/enzyme/Enzyme/EnzymeLogic.h b/enzyme/Enzyme/EnzymeLogic.h index ac65e77344329..ec54b19e4b773 100644 --- a/enzyme/Enzyme/EnzymeLogic.h +++ b/enzyme/Enzyme/EnzymeLogic.h @@ -36,6 +36,6 @@ extern llvm::cl::opt enzyme_print; //! return structtype if recursive function std::pair CreateAugmentedPrimal(llvm::Function* todiff, llvm::AAResults &AA, const std::set& constant_args, llvm::TargetLibraryInfo &TLI, bool differentialReturn); -llvm::Function* CreatePrimalAndGradient(llvm::Function* todiff, const std::set& constant_args, llvm::TargetLibraryInfo &TLI, llvm::AAResults &AA, bool returnValue, bool differentialReturn, bool topLevel, llvm::Type* additionalArg); +llvm::Function* CreatePrimalAndGradient(llvm::Function* todiff, const std::set& constant_args, llvm::TargetLibraryInfo &TLI, llvm::AAResults &AA, bool returnValue, bool differentialReturn, bool topLevel, llvm::Type* additionalArg, std::set volatile_args); #endif diff --git a/enzyme/Enzyme/FunctionUtils.cpp b/enzyme/Enzyme/FunctionUtils.cpp index 38d9a177a4d43..cf233f0f32473 100644 --- a/enzyme/Enzyme/FunctionUtils.cpp +++ b/enzyme/Enzyme/FunctionUtils.cpp @@ -164,8 +164,13 @@ PHINode* canonicalizeIVs(fake::SCEVExpander &e, Type *Ty, Loop *L, DominatorTree Function* preprocessForClone(Function *F, AAResults &AA, TargetLibraryInfo &TLI) { static std::map cache; - if (cache.find(F) != cache.end()) return cache[F]; - + static std::map cache_AA; + llvm::errs() << "Before cache lookup for " << F->getName() << "\n"; + if (cache.find(F) != cache.end()) { + AA.addAAResult(*(cache_AA[F])); + return cache[F]; + } + llvm::errs() << "Did not do cache lookup for " << F->getName() << "\n"; Function *NewF = Function::Create(F->getFunctionType(), F->getLinkage(), "preprocess_" + F->getName(), F->getParent()); ValueToValueMapTy VMap; @@ -439,7 +444,7 @@ Function* preprocessForClone(Function *F, AAResults &AA, TargetLibraryInfo &TLI) FunctionAnalysisManager AM; AM.registerPass([] { return AAManager(); }); AM.registerPass([] { return ScalarEvolutionAnalysis(); }); - AM.registerPass([] { return AssumptionAnalysis(); }); + //AM.registerPass([] { return AssumptionAnalysis(); }); AM.registerPass([] { return TargetLibraryAnalysis(); }); AM.registerPass([] { return TargetIRAnalysis(); }); AM.registerPass([] { return LoopAnalysis(); }); @@ -458,13 +463,22 @@ Function* preprocessForClone(Function *F, AAResults &AA, TargetLibraryInfo &TLI) MAM.registerPass([&] { return FunctionAnalysisManagerModuleProxy(AM); }); //Alias analysis is necessary to ensure can query whether we can move a forward pass function - BasicAA ba; - auto baa = new BasicAAResult(ba.run(*NewF, AM)); + //BasicAA ba; + //auto baa = new BasicAAResult(ba.run(*NewF, AM)); + AssumptionCache* AC = new AssumptionCache(*NewF); + TargetLibraryInfo* TLI = new TargetLibraryInfo(AM.getResult(*NewF)); + auto baa = new BasicAAResult(NewF->getParent()->getDataLayout(), + *NewF, + *TLI, + *AC, + &AM.getResult(*NewF), + AM.getCachedResult(*NewF), + AM.getCachedResult(*NewF)); + cache_AA[F] = baa; AA.addAAResult(*baa); - - ScopedNoAliasAA sa; - auto saa = new ScopedNoAliasAAResult(sa.run(*NewF, AM)); - AA.addAAResult(*saa); + //ScopedNoAliasAA sa; + //auto saa = new ScopedNoAliasAAResult(sa.run(*NewF, AM)); + //AA.addAAResult(*saa); } diff --git a/enzyme/Enzyme/GradientUtils.cpp b/enzyme/Enzyme/GradientUtils.cpp index 6157e2601ec78..49d5bb73ee08a 100644 --- a/enzyme/Enzyme/GradientUtils.cpp +++ b/enzyme/Enzyme/GradientUtils.cpp @@ -351,7 +351,8 @@ Value* GradientUtils::invertPointerM(Value* val, IRBuilder<>& BuilderM) { return invertedPointers[val] = cs; } else if (auto fn = dyn_cast(val)) { //! Todo allow tape propagation - auto newf = CreatePrimalAndGradient(fn, /*constant_args*/{}, TLI, AA, /*returnValue*/false, /*differentialReturn*/fn->getReturnType()->isFPOrFPVectorTy(), /*topLevel*/false, /*additionalArg*/nullptr); + std::set uncacheable_args; + auto newf = CreatePrimalAndGradient(fn, /*constant_args*/{}, TLI, AA, /*returnValue*/false, /*differentialReturn*/fn->getReturnType()->isFPOrFPVectorTy(), /*topLevel*/false, /*additionalArg*/nullptr, uncacheable_args); return BuilderM.CreatePointerCast(newf, fn->getType()); } else if (auto arg = dyn_cast(val)) { auto result = BuilderM.CreateCast(arg->getOpcode(), invertPointerM(arg->getOperand(0), BuilderM), arg->getDestTy(), arg->getName()+"'ipc"); @@ -824,10 +825,12 @@ Value* GradientUtils::lookupM(Value* val, IRBuilder<>& BuilderM) { } } - if (!shouldRecompute(inst, available)) { - auto op = unwrapM(inst, BuilderM, available, /*lookupIfAble*/true); - assert(op); - return op; + if (!(*(this->can_modref_map))[inst]) { + if (!shouldRecompute(inst, available)) { + auto op = unwrapM(inst, BuilderM, available, /*lookupIfAble*/true); + assert(op); + return op; + } } /* if (!inLoop) { diff --git a/enzyme/Enzyme/GradientUtils.h b/enzyme/Enzyme/GradientUtils.h index 919fb73df8e48..ffa9207c80b00 100644 --- a/enzyme/Enzyme/GradientUtils.h +++ b/enzyme/Enzyme/GradientUtils.h @@ -89,6 +89,9 @@ class GradientUtils { ValueToValueMapTy scopeFrees; ValueToValueMapTy originalToNewFn; + std::map* can_modref_map; + + Value* getNewFromOriginal(Value* originst) { assert(originst); auto f = originalToNewFn.find(originst); @@ -507,7 +510,7 @@ class GradientUtils { } assert(lastScopeAlloc.find(malloc) == lastScopeAlloc.end()); cast(malloc)->replaceAllUsesWith(ret); - auto n = malloc->getName(); + std::string n = malloc->getName().str(); erase(cast(malloc)); ret->setName(n); } diff --git a/enzyme/functional_tests_c/Makefile b/enzyme/functional_tests_c/Makefile index 8d1c98051e0d1..310affbe33427 100644 --- a/enzyme/functional_tests_c/Makefile +++ b/enzyme/functional_tests_c/Makefile @@ -18,7 +18,7 @@ OBJ := $(wildcard *.c) all: $(patsubst %.c,build/%-enzyme0,$(OBJ)) $(patsubst %.c,build/%-enzyme1,$(OBJ)) $(patsubst %.c,build/%-enzyme2,$(OBJ)) $(patsubst %.c,build/%-enzyme3,$(OBJ)) -POST_ENZYME_FLAGS := -mem2reg -sroa -adce -simplifycfg -enzyme_cachereads=true +POST_ENZYME_FLAGS := -mem2reg -sroa -adce -simplifycfg #all: $(patsubst %.c,build/%-enzyme1,$(OBJ)) $(patsubst %.c,build/%-enzyme2,$(OBJ)) $(patsubst %.c,build/%-enzyme3,$(OBJ)) #clean: @@ -31,7 +31,7 @@ POST_ENZYME_FLAGS := -mem2reg -sroa -adce -simplifycfg -enzyme_cachereads=true #EXTRA_FLAGS = -indvars -loop-simplify -loop-rotate -# NOTE(TFK): Optimization level 0 is broken right now. +# /efs/home/tfk/valgrind-3.12.0/vg-in-place build/%-enzyme0: %.c @./setup.sh $(CLANG_BIN_PATH)/clang -std=c11 -O1 $(patsubst %.c,%,$<).c -S -emit-llvm -o $@.ll @./setup.sh $(CLANG_BIN_PATH)/opt $@.ll $(EXTRA_FLAGS) -load=$(ENZYME_PLUGIN) -enzyme $(POST_ENZYME_FLAGS) -o $@.bc diff --git a/enzyme/functional_tests_c/insertsort_sum.c b/enzyme/functional_tests_c/insertsort_sum.c index 875bf620077c1..c5e7cd33d3a0f 100644 --- a/enzyme/functional_tests_c/insertsort_sum.c +++ b/enzyme/functional_tests_c/insertsort_sum.c @@ -16,10 +16,8 @@ float* unsorted_array_init(int N) { return arr; } -// sums the first half of a sorted array. -void insertsort_sum (float* array, int N, float* ret) { +void insertsort_sum (float*__restrict array, int N, float*__restrict ret) { float sum = 0; - //qsort(array, N, sizeof(float), cmp); for (int i = 1; i < N; i++) { int j = i; @@ -31,30 +29,16 @@ void insertsort_sum (float* array, int N, float* ret) { } } - for (int i = 0; i < N/2; i++) { - printf("Val: %f\n", array[i]); + //printf("Val: %f\n", array[i]); sum += array[i]; } + *ret = sum; } - - int main(int argc, char** argv) { - - - - float a = 2.0; - float b = 3.0; - - - - float da = 0; - float db = 0; - - float ret = 0; float dret = 1.0; @@ -71,18 +55,15 @@ int main(int argc, char** argv) { printf("%d:%f\n", i, array[i]); } - //insertsort_sum(array, N, &ret); + __builtin_autodiff(insertsort_sum, array, d_array, N, &ret, &dret); + + printf("The total sum is %f\n", ret); printf("Array after sorting:\n"); for (int i = 0; i < N; i++) { printf("%d:%f\n", i, array[i]); } - - printf("The total sum is %f\n", ret); - - __builtin_autodiff(insertsort_sum, array, d_array, N, &ret, &dret); - for (int i = 0; i < N; i++) { printf("Diffe for index %d is %f\n", i, d_array[i]); if (i%2 == 0) { @@ -91,13 +72,5 @@ int main(int argc, char** argv) { assert(d_array[i] == 1.0); } } - - //__builtin_autodiff(compute_loops, &a, &da, &b, &db, &ret, &dret); - - - //assert(da == 100*1.0f); - //assert(db == 100*1.0f); - - //printf("hello! %f, res2 %f, da: %f, db: %f\n", ret, ret, da,db); return 0; } diff --git a/enzyme/functional_tests_c/insertsort_sum_alt.c b/enzyme/functional_tests_c/insertsort_sum_alt.c index 10cee35434ba5..944804b6b2715 100644 --- a/enzyme/functional_tests_c/insertsort_sum_alt.c +++ b/enzyme/functional_tests_c/insertsort_sum_alt.c @@ -35,7 +35,7 @@ void insertion_sort_inner(float* array, int i) { } // sums the first half of a sorted array. -void insertsort_sum (float* array, int N, float* ret) { +void insertsort_sum (float*__restrict array, int N, float*__restrict ret) { float sum = 0; //qsort(array, N, sizeof(float), cmp); @@ -45,7 +45,7 @@ void insertsort_sum (float* array, int N, float* ret) { for (int i = 0; i < N/2; i++) { - printf("Val: %f\n", array[i]); + //printf("Val: %f\n", array[i]); sum += array[i]; } *ret = sum; diff --git a/enzyme/functional_tests_c/readwriteread.c b/enzyme/functional_tests_c/readwriteread.c new file mode 100644 index 0000000000000..355c632190a23 --- /dev/null +++ b/enzyme/functional_tests_c/readwriteread.c @@ -0,0 +1,46 @@ +#include +#include +#include +#include +#define __builtin_autodiff __enzyme_autodiff +double __enzyme_autodiff(void*, ...); + +double f_read(double* x) { + double product = (*x) * (*x); + return product; +} + +void g_write(double* x, double product) { + *x = (*x) * product; +} + +double h_read(double* x) { + return *x; +} + +double readwriteread_helper(double* x) { + double product = f_read(x); + g_write(x, product); + double ret = h_read(x); + return ret; +} + +void readwriteread(double*__restrict x, double*__restrict ret) { + *ret = readwriteread_helper(x); +} + +int main(int argc, char** argv) { + double ret = 0; + double dret = 1.0; + double* x = (double*) malloc(sizeof(double)); + double* dx = (double*) malloc(sizeof(double)); + *x = 2.0; + *dx = 0.0; + + __builtin_autodiff(readwriteread, x, dx, &ret, &dret); + + + printf("dx is %f ret is %f\n", *dx, ret); + assert(*dx == 3*2.0*2.0); + return 0; +} diff --git a/enzyme/functional_tests_c/setup.sh b/enzyme/functional_tests_c/setup.sh index 98be63a09e0d5..c5c86df2fbb95 100755 --- a/enzyme/functional_tests_c/setup.sh +++ b/enzyme/functional_tests_c/setup.sh @@ -1,8 +1,8 @@ #!/bin/bash # NOTE(TFK): Uncomment for local testing. -export CLANG_BIN_PATH=./../../build-dbg/bin -export ENZYME_PLUGIN=./../mkdebug/Enzyme/LLVMEnzyme-7.so +export CLANG_BIN_PATH=./../../llvm/build/bin/ +export ENZYME_PLUGIN=./../build/Enzyme/LLVMEnzyme-7.so mkdir -p build $@ diff --git a/enzyme/functional_tests_c/testfiles/readwriteread-enzyme0.test b/enzyme/functional_tests_c/testfiles/readwriteread-enzyme0.test new file mode 100644 index 0000000000000..14a037d8426bd --- /dev/null +++ b/enzyme/functional_tests_c/testfiles/readwriteread-enzyme0.test @@ -0,0 +1,6 @@ +; RUN: cd %desired_wd +; RUN: make clean-readwriteread-enzyme0 ENZYME_PLUGIN=%loadEnzyme +; RUN: make build/readwriteread-enzyme0 ENZYME_PLUGIN=%loadEnzyme CLANG_BIN_PATH=%clangBinPath +; RUN: build/readwriteread-enzyme0 +; RUN: make clean-readwriteread-enzyme0 ENZYME_PLUGIN=%loadEnzyme + diff --git a/enzyme/functional_tests_c/testfiles/readwriteread-enzyme1.test b/enzyme/functional_tests_c/testfiles/readwriteread-enzyme1.test new file mode 100644 index 0000000000000..9dc3174b8435e --- /dev/null +++ b/enzyme/functional_tests_c/testfiles/readwriteread-enzyme1.test @@ -0,0 +1,6 @@ +; RUN: cd %desired_wd +; RUN: make clean-readwriteread-enzyme1 ENZYME_PLUGIN=%loadEnzyme +; RUN: make build/readwriteread-enzyme1 ENZYME_PLUGIN=%loadEnzyme CLANG_BIN_PATH=%clangBinPath +; RUN: build/readwriteread-enzyme1 +; RUN: make clean-readwriteread-enzyme1 ENZYME_PLUGIN=%loadEnzyme + diff --git a/enzyme/functional_tests_c/testfiles/readwriteread-enzyme2.test b/enzyme/functional_tests_c/testfiles/readwriteread-enzyme2.test new file mode 100644 index 0000000000000..e03f5242726c6 --- /dev/null +++ b/enzyme/functional_tests_c/testfiles/readwriteread-enzyme2.test @@ -0,0 +1,6 @@ +; RUN: cd %desired_wd +; RUN: make clean-readwriteread-enzyme2 ENZYME_PLUGIN=%loadEnzyme +; RUN: make build/readwriteread-enzyme2 ENZYME_PLUGIN=%loadEnzyme CLANG_BIN_PATH=%clangBinPath +; RUN: build/readwriteread-enzyme2 +; RUN: make clean-readwriteread-enzyme2 ENZYME_PLUGIN=%loadEnzyme + diff --git a/enzyme/functional_tests_c/testfiles/readwriteread-enzyme3.test b/enzyme/functional_tests_c/testfiles/readwriteread-enzyme3.test new file mode 100644 index 0000000000000..40efc5f2c7e7d --- /dev/null +++ b/enzyme/functional_tests_c/testfiles/readwriteread-enzyme3.test @@ -0,0 +1,6 @@ +; RUN: cd %desired_wd +; RUN: make clean-readwriteread-enzyme3 ENZYME_PLUGIN=%loadEnzyme +; RUN: make build/readwriteread-enzyme3 ENZYME_PLUGIN=%loadEnzyme CLANG_BIN_PATH=%clangBinPath +; RUN: build/readwriteread-enzyme3 +; RUN: make clean-readwriteread-enzyme3 ENZYME_PLUGIN=%loadEnzyme + diff --git a/enzyme/test/Enzyme/badcall.ll b/enzyme/test/Enzyme/badcall.ll index 9672654917b29..15518f2ebd1da 100644 --- a/enzyme/test/Enzyme/badcall.ll +++ b/enzyme/test/Enzyme/badcall.ll @@ -42,11 +42,12 @@ attributes #1 = { noinline nounwind uwtable } ; CHECK: define internal {{(dso_local )?}}{} @diffef(double* nocapture %x, double* %"x'") ; CHECK-NEXT: entry: -; CHECK-NEXT: %0 = call { { {} } } @augmented_subf(double* %x, double* %"x'") -; CHECK-NEXT: store double 2.000000e+00, double* %x, align 8 -; CHECK-NEXT: store double 0.000000e+00, double* %"x'" -; CHECK-NEXT: %1 = call {} @diffesubf(double* nonnull %x, double* %"x'", { {} } undef) -; CHECK-NEXT: ret {} undef +; CHECK-NEXT: %0 = call { { {}, double } } @augmented_subf(double* %x, double* %"x'") +; CHECK-NEXT: %1 = extractvalue { { {}, double } } %0, 0 +; CHECK-NEXT: store double 2.000000e+00, double* %x, align 8 +; CHECK-NEXT: store double 0.000000e+00, double* %"x'" +; CHECK-NEXT: %2 = call {} @diffesubf(double* nonnull %x, double* %"x'", { {}, double } %1) +; CHECK-NEXT: ret {} undef ; CHECK-NEXT: } ; CHECK: define internal {{(dso_local )?}}{ {} } @augmented_metasubf(double* nocapture %x, double* %"x'") @@ -56,16 +57,21 @@ attributes #1 = { noinline nounwind uwtable } ; CHECK-NEXT: ret { {} } undef ; CHECK-NEXT: } -; CHECK: define internal {{(dso_local )?}}{ { {} } } @augmented_subf(double* nocapture %x, double* %"x'") +; CHECK: define internal {{(dso_local )?}}{ { {}, double } } @augmented_subf(double* nocapture %x, double* %"x'") ; CHECK-NEXT: entry: -; CHECK-NEXT: %0 = load double, double* %x, align 8 -; CHECK-NEXT: %mul = fmul fast double %0, 2.000000e+00 -; CHECK-NEXT: store double %mul, double* %x, align 8 -; CHECK-NEXT: %1 = call { {} } @augmented_metasubf(double* %x, double* %"x'") -; CHECK-NEXT: ret { { {} } } undef +; CHECK-NEXT: %0 = alloca { { {}, double } } +; CHECK-NEXT: %1 = getelementptr { { {}, double } }, { { {}, double } }* %0, i32 0, i32 0 +; CHECK-NEXT: %2 = load double, double* %x, align 8 +; CHECK-NEXT: %3 = getelementptr { {}, double }, { {}, double }* %1, i32 0, i32 1 +; CHECK-NEXT: store double %2, double* %3 +; CHECK-NEXT: %mul = fmul fast double %2, 2.000000e+00 +; CHECK-NEXT: store double %mul, double* %x, align 8 +; CHECK-NEXT: %4 = call { {} } @augmented_metasubf(double* %x, double* %"x'") +; CHECK-NEXT: %5 = load { { {}, double } }, { { {}, double } }* %0 +; CHECK-NEXT: ret { { {}, double } } %5 ; CHECK-NEXT: } -; CHECK: define internal {{(dso_local )?}}{} @diffesubf(double* nocapture %x, double* %"x'", { {} } %tapeArg) +; CHECK: define internal {{(dso_local )?}}{} @diffesubf(double* nocapture %x, double* %"x'", { {}, double } %tapeArg) ; CHECK-NEXT: entry: ; CHECK-NEXT: %0 = call {} @diffemetasubf(double* %x, double* %"x'", {} undef) ; CHECK-NEXT: %1 = load double, double* %"x'" diff --git a/enzyme/test/Enzyme/badcall2.ll b/enzyme/test/Enzyme/badcall2.ll index 10a46708f25f4..0f47f7f1435ea 100644 --- a/enzyme/test/Enzyme/badcall2.ll +++ b/enzyme/test/Enzyme/badcall2.ll @@ -50,10 +50,11 @@ declare dso_local double @__enzyme_autodiff(i8*, double*, double*) local_unnamed ; CHECK: define internal {{(dso_local )?}}{} @diffef(double* nocapture %x, double* %"x'") ; CHECK-NEXT: entry: -; CHECK-NEXT: %0 = call { { {}, {} } } @augmented_subf(double* %x, double* %"x'") +; CHECK-NEXT: %0 = call { { {}, {}, double } } @augmented_subf(double* %x, double* %"x'") +; CHECK-NEXT: %1 = extractvalue { { {}, {}, double } } %0, 0 ; CHECK-NEXT: store double 2.000000e+00, double* %x, align 8 ; CHECK-NEXT: store double 0.000000e+00, double* %"x'", align 8 -; CHECK-NEXT: %1 = call {} @diffesubf(double* nonnull %x, double* %"x'", { {}, {} } undef) +; CHECK-NEXT: %2 = call {} @diffesubf(double* nonnull %x, double* %"x'", { {}, {}, double } %1) ; CHECK-NEXT: ret {} undef ; CHECK-NEXT: } @@ -71,17 +72,23 @@ declare dso_local double @__enzyme_autodiff(i8*, double*, double*) local_unnamed ; CHECK-NEXT: ret { {} } undef ; CHECK-NEXT: } -; CHECK: define internal {{(dso_local )?}}{ { {}, {} } } @augmented_subf(double* nocapture %x, double* %"x'") +; CHECK: define internal {{(dso_local )?}}{ { {}, {}, double } } @augmented_subf(double* nocapture %x, double* %"x'") ; CHECK-NEXT: entry: -; CHECK-NEXT: %0 = load double, double* %x, align 8 -; CHECK-NEXT: %mul = fmul fast double %0, 2.000000e+00 +; CHECK-NEXT: %0 = alloca { { {}, {}, double } } +; CHECK-NEXT: %1 = getelementptr { { {}, {}, double } }, { { {}, {}, double } }* %0, i32 0, i32 0 +; CHECK-NEXT: %2 = load double, double* %x, align 8 +; CHECK-NEXT: %3 = getelementptr { {}, {}, double }, { {}, {}, double }* %1, i32 0, i32 2 +; CHECK-NEXT: store double %2, double* %3 +; CHECK-NEXT: %mul = fmul fast double %2, 2.000000e+00 ; CHECK-NEXT: store double %mul, double* %x, align 8 -; CHECK-NEXT: %1 = call { {} } @augmented_metasubf(double* %x, double* %"x'") -; CHECK-NEXT: %2 = call { {} } @augmented_othermetasubf(double* %x, double* %"x'") -; CHECK-NEXT: ret { { {}, {} } } undef +; CHECK-NEXT: %4 = call { {} } @augmented_metasubf(double* %x, double* %"x'") +; CHECK-NEXT: %5 = call { {} } @augmented_othermetasubf(double* %x, double* %"x'") +; CHECK-NEXT: %6 = load { { {}, {}, double } }, { { {}, {}, double } }* %0 +; CHECK-NEXT: ret { { {}, {}, double } } %6 ; CHECK-NEXT: } -; CHECK: define internal {{(dso_local )?}}{} @diffesubf(double* nocapture %x, double* %"x'", { {}, {} } %tapeArg) + +; CHECK: define internal {{(dso_local )?}}{} @diffesubf(double* nocapture %x, double* %"x'", { {}, {}, double } %tapeArg) ; CHECK-NEXT: entry: ; CHECK-NEXT: %0 = call {} @diffeothermetasubf(double* %x, double* %"x'", {} undef) ; CHECK-NEXT: %1 = call {} @diffemetasubf(double* %x, double* %"x'", {} undef) diff --git a/enzyme/test/Enzyme/badcall3.ll b/enzyme/test/Enzyme/badcall3.ll index 86fb9083359b6..0d0b936da2fe3 100644 --- a/enzyme/test/Enzyme/badcall3.ll +++ b/enzyme/test/Enzyme/badcall3.ll @@ -50,10 +50,11 @@ declare dso_local double @__enzyme_autodiff(i8*, double*, double*) local_unnamed ; CHECK: define internal {{(dso_local )?}}{} @diffef(double* nocapture %x, double* %"x'") ; CHECK-NEXT: entry: -; CHECK-NEXT: %0 = call { { {}, {} } } @augmented_subf(double* %x, double* %"x'") +; CHECK-NEXT: %0 = call { { {}, {}, double } } @augmented_subf(double* %x, double* %"x'") +; CHECK-NEXT: %1 = extractvalue { { {}, {}, double } } %0, 0 ; CHECK-NEXT: store double 2.000000e+00, double* %x, align 8 ; CHECK-NEXT: store double 0.000000e+00, double* %"x'", align 8 -; CHECK-NEXT: %1 = call {} @diffesubf(double* nonnull %x, double* %"x'", { {}, {} } undef) +; CHECK-NEXT: %2 = call {} @diffesubf(double* nonnull %x, double* %"x'", { {}, {}, double } %1) ; CHECK-NEXT: ret {} undef ; CHECK-NEXT: } @@ -71,17 +72,23 @@ declare dso_local double @__enzyme_autodiff(i8*, double*, double*) local_unnamed ; CHECK-NEXT: ret { {} } undef ; CHECK-NEXT: } -; CHECK: define internal {{(dso_local )?}}{ { {}, {} } } @augmented_subf(double* nocapture %x, double* %"x'") +; CHECK: define internal {{(dso_local )?}}{ { {}, {}, double } } @augmented_subf(double* nocapture %x, double* %"x'") ; CHECK-NEXT: entry: -; CHECK-NEXT: %0 = load double, double* %x, align 8 -; CHECK-NEXT: %mul = fmul fast double %0, 2.000000e+00 -; CHECK-NEXT: store double %mul, double* %x, align 8 -; CHECK-NEXT: %1 = call { {} } @augmented_metasubf(double* %x, double* %"x'") -; CHECK-NEXT: %2 = call { {} } @augmented_othermetasubf(double* %x, double* %"x'") -; CHECK-NEXT: ret { { {}, {} } } undef +; CHECK-NEXT: %0 = alloca { { {}, {}, double } } +; CHECK-NEXT: %1 = getelementptr { { {}, {}, double } }, { { {}, {}, double } }* %0, i32 0, i32 0 +; CHECK-NEXT: %2 = load double, double* %x, align 8 +; CHECK-NEXT: %3 = getelementptr { {}, {}, double }, { {}, {}, double }* %1, i32 0, i32 2 +; CHECK-NEXT: store double %2, double* %3 +; CHECK-NEXT: %mul = fmul fast double %2, 2.000000e+00 +; CHECK-NEXT: store double %mul, double* %x, align 8 +; CHECK-NEXT: %4 = call { {} } @augmented_metasubf(double* %x, double* %"x'") +; CHECK-NEXT: %5 = call { {} } @augmented_othermetasubf(double* %x, double* %"x'") +; CHECK-NEXT: %6 = load { { {}, {}, double } }, { { {}, {}, double } }* %0 +; CHECK-NEXT: ret { { {}, {}, double } } %6 ; CHECK-NEXT: } -; CHECK: define internal {{(dso_local )?}}{} @diffesubf(double* nocapture %x, double* %"x'", { {}, {} } %tapeArg) + +; CHECK: define internal {{(dso_local )?}}{} @diffesubf(double* nocapture %x, double* %"x'", { {}, {}, double } %tapeArg) ; CHECK-NEXT: entry: ; CHECK-NEXT: %0 = call {} @diffeothermetasubf(double* %x, double* %"x'", {} undef) ; CHECK-NEXT: %1 = call {} @diffemetasubf(double* %x, double* %"x'", {} undef) diff --git a/enzyme/test/Enzyme/badcall4.ll b/enzyme/test/Enzyme/badcall4.ll index b7183c501717f..b099fac3c2e92 100644 --- a/enzyme/test/Enzyme/badcall4.ll +++ b/enzyme/test/Enzyme/badcall4.ll @@ -51,11 +51,11 @@ declare dso_local double @__enzyme_autodiff(i8*, double*, double*) local_unnamed ; CHECK: define internal {{(dso_local )?}}{} @diffef(double* nocapture %x, double* %"x'") ; CHECK-NEXT: entry: -; CHECK-NEXT: %0 = call { { {}, i1, {}, i1 } } @augmented_subf(double* %x, double* %"x'") -; CHECK-NEXT: %1 = extractvalue { { {}, i1, {}, i1 } } %0, 0 +; CHECK-NEXT: %0 = call { { {}, i1, {}, i1, double } } @augmented_subf(double* %x, double* %"x'") +; CHECK-NEXT: %1 = extractvalue { { {}, i1, {}, i1, double } } %0, 0 ; CHECK-NEXT: store double 2.000000e+00, double* %x, align 8 ; CHECK-NEXT: store double 0.000000e+00, double* %"x'", align 8 -; CHECK-NEXT: %2 = call {} @diffesubf(double* nonnull %x, double* %"x'", { {}, i1, {}, i1 } %1) +; CHECK-NEXT: %2 = call {} @diffesubf(double* nonnull %x, double* %"x'", { {}, i1, {}, i1, double } %1) ; CHECK-NEXT: ret {} undef ; CHECK-NEXT: } @@ -63,7 +63,7 @@ declare dso_local double @__enzyme_autodiff(i8*, double*, double*) local_unnamed ; CHECK: define internal {{(dso_local )?}}{ {}, i1 } @augmented_metasubf(double* nocapture %x, double* %"x'") -; CHECK: define internal {{(dso_local )?}}{ { {}, i1, {}, i1 } } @augmented_subf(double* nocapture %x, double* %"x'") +; CHECK: define internal {{(dso_local )?}}{ { {}, i1, {}, i1, double } } @augmented_subf(double* nocapture %x, double* %"x'") ; CHECK-NEXT: entry: ; CHECK-NEXT: %0 = load double, double* %x, align 8 ; CHECK-NEXT: %mul = fmul fast double %0, 2.000000e+00 @@ -72,12 +72,13 @@ declare dso_local double @__enzyme_autodiff(i8*, double*, double*) local_unnamed ; CHECK-NEXT: %2 = extractvalue { {}, i1 } %1, 1 ; CHECK-NEXT: %3 = call { {}, i1 } @augmented_othermetasubf(double* %x, double* %"x'") ; CHECK-NEXT: %4 = extractvalue { {}, i1 } %3, 1 -; CHECK-NEXT: %[[iv1:.+]] = insertvalue { { {}, i1, {}, i1 } } undef, i1 %4, 0, 1 -; CHECK-NEXT: %[[iv2:.+]] = insertvalue { { {}, i1, {}, i1 } } %[[iv1]], i1 %2, 0, 3 -; CHECK-NEXT: ret { { {}, i1, {}, i1 } } %[[iv2]] +; CHECK-NEXT: %.fca.0.1.insert = insertvalue { { {}, i1, {}, i1, double } } undef, i1 %4, 0, 1 +; CHECK-NEXT: %.fca.0.3.insert = insertvalue { { {}, i1, {}, i1, double } } %.fca.0.1.insert, i1 %2, 0, 3 +; CHECK-NEXT: %.fca.0.4.insert = insertvalue { { {}, i1, {}, i1, double } } %.fca.0.3.insert, double %0, 0, 4 +; CHECK-NEXT: ret { { {}, i1, {}, i1, double } } %.fca.0.4.insert ; CHECK-NEXT: } -; CHECK: define internal {{(dso_local )?}}{} @diffesubf(double* nocapture %x, double* %"x'", { {}, i1, {}, i1 } %tapeArg) +; CHECK: define internal {{(dso_local )?}}{} @diffesubf(double* nocapture %x, double* %"x'", { {}, i1, {}, i1, double } %tapeArg) ; CHECK-NEXT: entry: ; CHECK-NEXT: %0 = call {} @diffeothermetasubf(double* %x, double* %"x'", {} undef) ; CHECK-NEXT: %1 = call {} @diffemetasubf(double* %x, double* %"x'", {} undef) diff --git a/enzyme/test/Enzyme/badcallused.ll b/enzyme/test/Enzyme/badcallused.ll index 51f8b2b915edd..e39062a1751e3 100644 --- a/enzyme/test/Enzyme/badcallused.ll +++ b/enzyme/test/Enzyme/badcallused.ll @@ -43,12 +43,13 @@ attributes #1 = { noinline nounwind uwtable } ; CHECK: define internal {{(dso_local )?}}{} @diffef(double* nocapture %x, double* %"x'") ; CHECK-NEXT: entry: -; CHECK-NEXT: %0 = call { { {} }, i1, i1 } @augmented_subf(double* %x, double* %"x'") -; CHECK-NEXT: %1 = extractvalue { { {} }, i1, i1 } %0, 1 -; CHECK-NEXT: %sel = select i1 %1, double 2.000000e+00, double 3.000000e+00 +; CHECK-NEXT: %0 = call { { {}, double }, i1, i1 } @augmented_subf(double* %x, double* %"x'") +; CHECK-NEXT: %1 = extractvalue { { {}, double }, i1, i1 } %0, 0 +; CHECK-NEXT: %2 = extractvalue { { {}, double }, i1, i1 } %0, 1 +; CHECK-NEXT: %sel = select i1 %2, double 2.000000e+00, double 3.000000e+00 ; CHECK-NEXT: store double %sel, double* %x, align 8 -; CHECK-NEXT: store double 0.000000e+00, double* %"x'" -; CHECK-NEXT: %[[dsubf:.+]] = call {} @diffesubf(double* nonnull %x, double* %"x'", { {} } undef) +; CHECK-NEXT: store double 0.000000e+00, double* %"x'", align 8 +; CHECK-NEXT: %3 = call {} @diffesubf(double* nonnull %x, double* %"x'", { {}, double } %1) ; CHECK-NEXT: ret {} undef ; CHECK-NEXT: } @@ -65,24 +66,29 @@ attributes #1 = { noinline nounwind uwtable } ; CHECK-NEXT: ret { {}, i1, i1 } %3 ; CHECK-NEXT: } -; CHECK: define internal {{(dso_local )?}}{ { {} }, i1, i1 } @augmented_subf(double* nocapture %x, double* %"x'") +; CHECK: define internal {{(dso_local )?}}{ { {}, double }, i1, i1 } @augmented_subf(double* nocapture %x, double* %"x'") ; CHECK-NEXT: entry: -; CHECK-NEXT: %0 = alloca { { {} }, i1, i1 } -; CHECK-NEXT: %1 = load double, double* %x, align 8 -; CHECK-NEXT: %mul = fmul fast double %1, 2.000000e+00 +; CHECK-NEXT: %0 = alloca { { {}, double }, i1, i1 } +; CHECK-NEXT: %1 = getelementptr { { {}, double }, i1, i1 } +; CHECK-NEXT: %2 = load double, double* %x, align 8 +; CHECK-NEXT: %3 = getelementptr { {}, double }, { {}, double }* %1, i32 0, i32 1 +; CHECK-NEXT: store double %2, double* %3 +; CHECK-NEXT: %mul = fmul fast double %2, 2.000000e+00 ; CHECK-NEXT: store double %mul, double* %x, align 8 -; CHECK-NEXT: %2 = call { {}, i1, i1 } @augmented_metasubf(double* %x, double* %"x'") -; CHECK-NEXT: %3 = extractvalue { {}, i1, i1 } %2, 1 -; CHECK-NEXT: %antiptr_call = extractvalue { {}, i1, i1 } %2, 2 -; CHECK-NEXT: %4 = getelementptr { { {} }, i1, i1 }, { { {} }, i1, i1 }* %0, i32 0, i32 1 -; CHECK-NEXT: store i1 %3, i1* %4 -; CHECK-NEXT: %5 = getelementptr { { {} }, i1, i1 }, { { {} }, i1, i1 }* %0, i32 0, i32 2 -; CHECK-NEXT: store i1 %antiptr_call, i1* %5 -; CHECK-NEXT: %[[toret:.+]] = load { { {} }, i1, i1 }, { { {} }, i1, i1 }* %0 -; CHECK-NEXT: ret { { {} }, i1, i1 } %[[toret]] +; CHECK-NEXT: %4 = call { {}, i1, i1 } @augmented_metasubf(double* %x, double* %"x'") +; CHECK-NEXT: %5 = extractvalue { {}, i1, i1 } %4, 1 +; CHECK-NEXT: %antiptr_call = extractvalue { {}, i1, i1 } %4, 2 + + +; CHECK-NEXT: %6 = getelementptr { { {}, double }, i1, i1 }, { { {}, double }, i1, i1 }* %0, i32 0, i32 1 +; CHECK-NEXT: store i1 %5, i1* %6 +; CHECK-NEXT: %7 = getelementptr { { {}, double }, i1, i1 }, { { {}, double }, i1, i1 }* %0, i32 0, i32 2 +; CHECK-NEXT: store i1 %antiptr_call, i1* %7 +; CHECK-NEXT: %[[toret:.+]] = load { { {}, double }, i1, i1 }, { { {}, double }, i1, i1 }* %0 +; CHECK-NEXT: ret { { {}, double }, i1, i1 } %[[toret]] ; CHECK-NEXT: } -; CHECK: define internal {{(dso_local )?}}{} @diffesubf(double* nocapture %x, double* %"x'", { {} } %tapeArg) +; CHECK: define internal {{(dso_local )?}}{} @diffesubf(double* nocapture %x, double* %"x'", { {}, double } %tapeArg) ; CHECK-NEXT: entry: ; CHECK-NEXT: %0 = call {} @diffemetasubf(double* %x, double* %"x'", {} undef) ; CHECK-NEXT: %1 = load double, double* %"x'" diff --git a/enzyme/test/Enzyme/badcallused2.ll b/enzyme/test/Enzyme/badcallused2.ll index 92069b003948f..0513dde7ad9f1 100644 --- a/enzyme/test/Enzyme/badcallused2.ll +++ b/enzyme/test/Enzyme/badcallused2.ll @@ -53,12 +53,13 @@ attributes #1 = { noinline nounwind uwtable } ; CHECK: define internal {{(dso_local )?}}{} @diffef(double* nocapture %x, double* %"x'") ; CHECK-NEXT: entry: -; CHECK-NEXT: %0 = call { { {}, {} }, i1, i1 } @augmented_subf(double* %x, double* %"x'") -; CHECK-NEXT: %1 = extractvalue { { {}, {} }, i1, i1 } %0, 1 -; CHECK-NEXT: %sel = select i1 %1, double 2.000000e+00, double 3.000000e+00 +; CHECK-NEXT: %0 = call { { {}, {}, double }, i1, i1 } @augmented_subf(double* %x, double* %"x'") +; CHECK-NEXT: %1 = extractvalue { { {}, {}, double }, i1, i1 } %0, 0 +; CHECK-NEXT: %2 = extractvalue { { {}, {}, double }, i1, i1 } %0, 1 +; CHECK-NEXT: %sel = select i1 %2, double 2.000000e+00, double 3.000000e+00 ; CHECK-NEXT: store double %sel, double* %x, align 8 ; CHECK-NEXT: store double 0.000000e+00, double* %"x'" -; CHECK-NEXT: %[[dsubf:.+]] = call {} @diffesubf(double* nonnull %x, double* %"x'", { {}, {} } undef) +; CHECK-NEXT: %[[dsubf:.+]] = call {} @diffesubf(double* nonnull %x, double* %"x'", { {}, {}, double } %1) ; CHECK-NEXT: ret {} undef ; CHECK-NEXT: } @@ -82,25 +83,28 @@ attributes #1 = { noinline nounwind uwtable } ; CHECK-NEXT: ret { {} } undef ; CHECK-NEXT: } -; CHECK: define internal {{(dso_local )?}}{ { {}, {} }, i1, i1 } @augmented_subf(double* nocapture %x, double* %"x'") +; CHECK: define internal {{(dso_local )?}}{ { {}, {}, double }, i1, i1 } @augmented_subf(double* nocapture %x, double* %"x'") ; CHECK-NEXT: entry: -; CHECK-NEXT: %0 = alloca { { {}, {} }, i1, i1 } -; CHECK-NEXT: %1 = load double, double* %x, align 8 -; CHECK-NEXT: %mul = fmul fast double %1, 2.000000e+00 +; CHECK-NEXT: %0 = alloca { { {}, {}, double }, i1, i1 } +; CHECK-NEXT: %1 = getelementptr { { {}, {}, double }, i1, i1 }, { { {}, {}, double }, i1, i1 }* %0, i32 0, i32 0 +; CHECK-NEXT: %2 = load double, double* %x, align 8 +; CHECK-NEXT: %3 = getelementptr { {}, {}, double }, { {}, {}, double }* %1, i32 0, i32 2 +; CHECK-NEXT: store double %2, double* %3 +; CHECK-NEXT: %mul = fmul fast double %2, 2.000000e+00 ; CHECK-NEXT: store double %mul, double* %x, align 8 -; CHECK-NEXT: %2 = call { {} } @augmented_omegasubf(double* %x, double* %"x'") -; CHECK-NEXT: %3 = call { {}, i1, i1 } @augmented_metasubf(double* %x, double* %"x'") -; CHECK-NEXT: %4 = extractvalue { {}, i1, i1 } %3, 1 -; CHECK-NEXT: %antiptr_call2 = extractvalue { {}, i1, i1 } %3, 2 -; CHECK-NEXT: %5 = getelementptr { { {}, {} }, i1, i1 }, { { {}, {} }, i1, i1 }* %0, i32 0, i32 1 -; CHECK-NEXT: store i1 %4, i1* %5 -; CHECK-NEXT: %6 = getelementptr { { {}, {} }, i1, i1 }, { { {}, {} }, i1, i1 }* %0, i32 0, i32 2 -; CHECK-NEXT: store i1 %antiptr_call2, i1* %6 -; CHECK-NEXT: %[[toret:.+]] = load { { {}, {} }, i1, i1 }, { { {}, {} }, i1, i1 }* %0 -; CHECK-NEXT: ret { { {}, {} }, i1, i1 } %[[toret]] +; CHECK-NEXT: %4 = call { {} } @augmented_omegasubf(double* %x, double* %"x'") +; CHECK-NEXT: %5 = call { {}, i1, i1 } @augmented_metasubf(double* %x, double* %"x'") +; CHECK-NEXT: %6 = extractvalue { {}, i1, i1 } %5, 1 +; CHECK-NEXT: %antiptr_call2 = extractvalue { {}, i1, i1 } %5, 2 +; CHECK-NEXT: %7 = getelementptr { { {}, {}, double }, i1, i1 }, { { {}, {}, double }, i1, i1 }* %0, i32 0, i32 1 +; CHECK-NEXT: store i1 %6, i1* %7 +; CHECK-NEXT: %8 = getelementptr { { {}, {}, double }, i1, i1 }, { { {}, {}, double }, i1, i1 }* %0, i32 0, i32 2 +; CHECK-NEXT: store i1 %antiptr_call2, i1* %8 +; CHECK-NEXT: %[[toret:.+]] = load { { {}, {}, double }, i1, i1 }, { { {}, {}, double }, i1, i1 }* %0 +; CHECK-NEXT: ret { { {}, {}, double }, i1, i1 } %[[toret]] ; CHECK-NEXT: } -; CHECK: define internal {{(dso_local )?}}{} @diffesubf(double* nocapture %x, double* %"x'", { {}, {} } %tapeArg) +; CHECK: define internal {{(dso_local )?}}{} @diffesubf(double* nocapture %x, double* %"x'", { {}, {}, double } %tapeArg) ; CHECK-NEXT: entry: ; CHECK-NEXT: %0 = call {} @diffemetasubf(double* %x, double* %"x'", {} undef) ; CHECK-NEXT: %1 = call {} @diffeomegasubf(double* %x, double* %"x'", {} undef)