diff --git a/externals/dynarmic/src/dynarmic/backend/x64/a64_interface.cpp b/externals/dynarmic/src/dynarmic/backend/x64/a64_interface.cpp index 7c0fa8d20..ec0b4989e 100644 --- a/externals/dynarmic/src/dynarmic/backend/x64/a64_interface.cpp +++ b/externals/dynarmic/src/dynarmic/backend/x64/a64_interface.cpp @@ -6,6 +6,8 @@ #include #include #include +#include +#include #include #include @@ -268,6 +270,28 @@ private: } block_of_code.EnsureMemoryCommitted(MINIMUM_REMAINING_CODESIZE); + // Get cache path + const auto cache_path = std::filesystem::path(conf.ir_cache_path) / (std::to_string(current_location.Value())+".ir"); + + // Load from disk cache + if (!conf.ir_cache_path.empty()) { + std::ifstream cache_file(cache_path, std::ios::binary); + if (cache_file) { + // Read entire file + std::vector data; + while (cache_file.read(reinterpret_cast(&data.emplace_back(0)), sizeof(data[0]))); + data.pop_back(); + cache_file.close(); + // Deserialize file + IR::Block ir_block(A64::LocationDescriptor{current_location}); + auto it = data.begin(); + ir_block.Deserialize(it); + ASSERT(!(it > data.end())); + ASSERT(!(it < data.end())); + return emitter.Emit(ir_block).entrypoint; + } + } + // JIT Compile const auto get_code = [this](u64 vaddr) { return conf.callbacks->MemoryReadCode(vaddr); }; IR::Block ir_block = A64::Translate(A64::LocationDescriptor{current_location}, get_code, @@ -287,6 +311,16 @@ private: Optimization::A64MergeInterpretBlocksPass(ir_block, conf.callbacks); } Optimization::VerificationPass(ir_block); + + // Store to disk cache if non-empty + if (!conf.ir_cache_path.empty() && !ir_block.empty()) { + std::ofstream cache_file(cache_path, std::ios::binary); + ASSERT_MSG(cache_file, "Failed to write cache file"); + std::vector data; + ir_block.Serialize(data); + cache_file.write(reinterpret_cast(data.data()), data.size()*sizeof(data[0])); + } + return emitter.Emit(ir_block).entrypoint; } diff --git a/externals/dynarmic/src/dynarmic/frontend/A64/translate/a64_translate.cpp b/externals/dynarmic/src/dynarmic/frontend/A64/translate/a64_translate.cpp index 7f9632347..05996aeb6 100644 --- a/externals/dynarmic/src/dynarmic/frontend/A64/translate/a64_translate.cpp +++ b/externals/dynarmic/src/dynarmic/frontend/A64/translate/a64_translate.cpp @@ -43,9 +43,6 @@ IR::Block Translate(LocationDescriptor descriptor, MemoryReadCodeFuncType memory ASSERT_MSG(block.HasTerminal(), "Terminal has not been set"); - std::vector fres; - block.Serialize(fres); - block.SetEndLocation(*visitor.ir.current_location); return block; diff --git a/externals/dynarmic/src/dynarmic/interface/A64/config.h b/externals/dynarmic/src/dynarmic/interface/A64/config.h index c8ed623eb..fa73f167b 100644 --- a/externals/dynarmic/src/dynarmic/interface/A64/config.h +++ b/externals/dynarmic/src/dynarmic/interface/A64/config.h @@ -5,6 +5,7 @@ #pragma once +#include #include #include #include @@ -285,10 +286,13 @@ struct UserConfig { /// AddTicks and GetTicksRemaining are never called, and no cycle counting is done. bool enable_cycle_counting = true; - // Minimum size is about 8MiB. Maximum size is about 128MiB (arm64 host) or 2GiB (x64 host). - // Maximum size is limited by the maximum length of a x86_64 / arm64 jump. + /// Minimum size is about 8MiB. Maximum size is about 128MiB (arm64 host) or 2GiB (x64 host). + /// Maximum size is limited by the maximum length of a x86_64 / arm64 jump. size_t code_cache_size = 128 * 1024 * 1024; // bytes + /// IR cache location, disabled if empty. Must be directory that exists. + std::string ir_cache_path; + /// Internal use only bool very_verbose_debugging_output = false; }; diff --git a/externals/dynarmic/src/dynarmic/ir/basic_block.cpp b/externals/dynarmic/src/dynarmic/ir/basic_block.cpp index b1253b11b..2d6998d24 100644 --- a/externals/dynarmic/src/dynarmic/ir/basic_block.cpp +++ b/externals/dynarmic/src/dynarmic/ir/basic_block.cpp @@ -30,8 +30,12 @@ Block::Block(Block&&) = default; Block& Block::operator=(Block&&) = default; -void Block::AppendNewInst(Opcode opcode, std::initializer_list args) { - PrependNewInst(end(), opcode, args); +Block::iterator Block::AppendNewInst(Opcode opcode, std::initializer_list args) { + return PrependNewInst(end(), opcode, args); +} + +Block::iterator Block::AppendNewInst(Inst&& inst) { + return PrependNewInst(end(), std::move(inst)); } Block::iterator Block::PrependNewInst(iterator insertion_point, Opcode opcode, std::initializer_list args) { @@ -46,6 +50,12 @@ Block::iterator Block::PrependNewInst(iterator insertion_point, Opcode opcode, s return instructions.insert_before(insertion_point, inst); } +Block::iterator Block::PrependNewInst(iterator insertion_point, Inst&& inst) { + IR::Inst* new_inst = new (instruction_alloc_pool->Alloc()) IR::Inst(std::move(inst)); + + return instructions.insert_before(insertion_point, new_inst); +} + LocationDescriptor Block::Location() const { return location; } @@ -121,8 +131,141 @@ const size_t& Block::CycleCount() const { } void Block::Serialize(std::vector& fres) const { + ASSERT(!empty()); + + fres.push_back(0xa91e); + + fres.push_back(size()); for (const auto& inst : *this) inst.Serialize(*this, fres); + + SerializeTerminal(GetTerminal(), fres); + EndLocation().Serialize(fres); +} + +void Block::Deserialize(std::vector::iterator& it) { + ASSERT(empty()); + + const bool magic_ok = *(it++) == 0xa91e; + ASSERT_MSG(magic_ok, "Bad IR block magic"); + + const auto inst_count = *(it++); + ASSERT(inst_count > 0); + std::vector insts; + for (unsigned idx = 0; idx != inst_count; ++idx) { + auto inst = Inst::Deserialize(insts, it); + Inst* ptr = &*AppendNewInst(std::move(inst)); + insts.push_back(ptr); + } + + SetTerminal(DeserializeTerminal(it)); + SetEndLocation(LocationDescriptor::Deserialize(it)); +} + +void Block::SerializeTerminal(const Term::Terminal& term, std::vector& fres) { + fres.push_back(0xa91f); + + struct Visitor : boost::static_visitor { + std::vector& fres; + + Visitor(std::vector& fres) : fres(fres) {} + + void operator()(const Term::Invalid&) const { + fres.push_back(0); + } + void operator()(const Term::Interpret& interp) const { + fres.push_back(1); + interp.next.Serialize(fres); + ASSERT(interp.num_instructions <= 0xffff); + fres.push_back(static_cast(interp.num_instructions)); + } + void operator()(const Term::ReturnToDispatch&) const { + fres.push_back(2); + } + void operator()(const Term::LinkBlock& link_block) const { + fres.push_back(3); + link_block.next.Serialize(fres); + } + void operator()(const Term::LinkBlockFast& link_block_fast) const { + fres.push_back(4); + link_block_fast.next.Serialize(fres); + } + void operator()(const Term::PopRSBHint&) const { + fres.push_back(5); + } + void operator()(const Term::FastDispatchHint&) const { + fres.push_back(6); + } + void operator()(const Term::If& if_) const { + fres.push_back(7); + fres.push_back(static_cast(if_.if_)); + SerializeTerminal(if_.then_, fres); + SerializeTerminal(if_.else_, fres); + } + void operator()(const Term::CheckBit& check_bit) const { + fres.push_back(8); + SerializeTerminal(check_bit.then_, fres); + SerializeTerminal(check_bit.else_, fres); + } + void operator()(const Term::CheckHalt& check_bit) const { + fres.push_back(9); + SerializeTerminal(check_bit.else_, fres); + } + } visitor{fres}; + + boost::apply_visitor(visitor, term); +} + +Term::Terminal Block::DeserializeTerminal(std::vector::iterator& it) { + const bool magic_ok = *(it++) == 0xa91f; + ASSERT_MSG(magic_ok, "Bad IR block magic"); + + Term::Terminal fres; + + const auto term_idx = *(it++); + switch (term_idx) { + case 0: { + fres = Term::Invalid(); + } break; + case 1: { + Term::Interpret interp(LocationDescriptor::Deserialize(it)); + interp.num_instructions = *(it++); + fres = std::move(interp); + } break; + case 2: { + fres = Term::ReturnToDispatch(); + } break; + case 3: { + fres = Term::LinkBlock(LocationDescriptor::Deserialize(it)); + } break; + case 4: { + fres = Term::LinkBlockFast(LocationDescriptor::Deserialize(it)); + } break; + case 5: { + fres = Term::PopRSBHint(); + } break; + case 6: { + fres = Term::FastDispatchHint(); + } break; + case 7: { + const auto cond = static_cast(*(it++)); + Term::Terminal then = DeserializeTerminal(it); + Term::Terminal else_ = DeserializeTerminal(it); + fres = Term::If(cond, std::move(then), std::move(else_)); + } break; + case 8: { + Term::Terminal then = DeserializeTerminal(it); + Term::Terminal else_ = DeserializeTerminal(it); + fres = Term::CheckBit(std::move(then), std::move(else_)); + } break; + case 9: { + Term::Terminal else_ = DeserializeTerminal(it); + fres = Term::CheckHalt(std::move(else_)); + } break; + default: ASSERT_FALSE("Invalid terminal type index"); + } + + return fres; } static std::string TerminalToString(const Terminal& terminal_variant) { diff --git a/externals/dynarmic/src/dynarmic/ir/basic_block.h b/externals/dynarmic/src/dynarmic/ir/basic_block.h index e01d4ca56..247c5f584 100644 --- a/externals/dynarmic/src/dynarmic/ir/basic_block.h +++ b/externals/dynarmic/src/dynarmic/ir/basic_block.h @@ -83,8 +83,10 @@ public: * * @param op Opcode representing the instruction to add. * @param args A sequence of Value instances used as arguments for the instruction. + * @returns Iterator to the newly created instruction. */ - void AppendNewInst(Opcode op, std::initializer_list args); + Block::iterator AppendNewInst(Opcode op, std::initializer_list args); + Block::iterator AppendNewInst(Inst&& inst); /** * Prepends a new instruction to this basic block before the insertion point, @@ -96,6 +98,7 @@ public: * @returns Iterator to the newly created instruction. */ iterator PrependNewInst(iterator insertion_point, Opcode op, std::initializer_list args); + iterator PrependNewInst(iterator insertion_point, Inst&& inst); /// Gets the starting location for this basic block. LocationDescriptor Location() const; @@ -141,8 +144,12 @@ public: const size_t& CycleCount() const; void Serialize(std::vector&) const; + void Deserialize(std::vector::iterator&); private: + static void SerializeTerminal(const Terminal&, std::vector&); + static Terminal DeserializeTerminal(std::vector::iterator&); + /// Description of the starting location of this block LocationDescriptor location; /// Description of the end location of this block diff --git a/externals/dynarmic/src/dynarmic/ir/location_descriptor.cpp b/externals/dynarmic/src/dynarmic/ir/location_descriptor.cpp index e7e640561..840587940 100644 --- a/externals/dynarmic/src/dynarmic/ir/location_descriptor.cpp +++ b/externals/dynarmic/src/dynarmic/ir/location_descriptor.cpp @@ -13,4 +13,23 @@ std::string ToString(const LocationDescriptor& descriptor) { return fmt::format("{{{:016x}}}", descriptor.Value()); } +void LocationDescriptor::Serialize(std::vector& fres) const { + u64 work_value = value; + fres.push_back(static_cast(work_value)); + work_value >>= 16; + fres.push_back(static_cast(work_value)); + work_value >>= 16; + fres.push_back(static_cast(work_value)); + work_value >>= 16; + fres.push_back(static_cast(work_value)); +} + +LocationDescriptor LocationDescriptor::Deserialize(std::vector::iterator& it) { + u64 value = *(it++); + value |= static_cast(*(it++)) << 16; + value |= static_cast(*(it++)) << 32; + value |= static_cast(*(it++)) << 48; + return LocationDescriptor(value); +} + } // namespace Dynarmic::IR diff --git a/externals/dynarmic/src/dynarmic/ir/location_descriptor.h b/externals/dynarmic/src/dynarmic/ir/location_descriptor.h index 48e5e32bb..2db660980 100644 --- a/externals/dynarmic/src/dynarmic/ir/location_descriptor.h +++ b/externals/dynarmic/src/dynarmic/ir/location_descriptor.h @@ -7,6 +7,7 @@ #include #include +#include #include #include @@ -26,6 +27,9 @@ public: return !operator==(o); } + void Serialize(std::vector& fres) const; + static LocationDescriptor Deserialize(std::vector::iterator&); + u64 Value() const { return value; } private: diff --git a/externals/dynarmic/src/dynarmic/ir/microinstruction.cpp b/externals/dynarmic/src/dynarmic/ir/microinstruction.cpp index ca16fbeff..3fd16efa0 100644 --- a/externals/dynarmic/src/dynarmic/ir/microinstruction.cpp +++ b/externals/dynarmic/src/dynarmic/ir/microinstruction.cpp @@ -653,12 +653,26 @@ void Inst::SetArg(size_t index, Value value) { } void Inst::Serialize(const Block& block, std::vector& fres) const { + fres.push_back(0xa91d); + fres.push_back(static_cast(GetOpcode())); fres.push_back(NumArgs()); for (unsigned idx = 0; idx != NumArgs(); idx++) GetArg(idx).Serialize(block, fres); } +Inst Inst::Deserialize(const std::vector& insts, std::vector::iterator& it) { + const bool magic_ok = *(it++) == 0xa91d; + ASSERT_MSG(magic_ok, "Bad IR instruction magic"); + + Inst fres(static_cast(*(it++))); + const auto num_args = *(it++); + for (unsigned idx = 0; idx != num_args; idx++) + fres.SetArg(idx, Value::Deserialize(insts, it)); + + return fres; +} + void Inst::Invalidate() { ClearArgs(); op = Opcode::Void; diff --git a/externals/dynarmic/src/dynarmic/ir/microinstruction.h b/externals/dynarmic/src/dynarmic/ir/microinstruction.h index 0e59247f0..55daf9a9a 100644 --- a/externals/dynarmic/src/dynarmic/ir/microinstruction.h +++ b/externals/dynarmic/src/dynarmic/ir/microinstruction.h @@ -25,8 +25,17 @@ constexpr size_t max_arg_count = 4; */ class Inst final : public mcl::intrusive_list_node { public: - explicit Inst(Opcode op) + Inst(Opcode op) : op(op) {} + Inst(Inst&& o) + : op(o.op), use_count(o.use_count), name(o.name), args(o.args), next_pseudoop(o.next_pseudoop) { + o.use_count = 0; + o.name = 0; + std::fill(o.args.begin(), o.args.end(), Value()); + o.next_pseudoop = nullptr; + } + Inst(const Inst& o) + : op(o.op), use_count(o.use_count), name(o.name), args(o.args), next_pseudoop(o.next_pseudoop) {} /// Determines whether or not this instruction performs an arithmetic shift. bool IsArithmeticShift() const; @@ -137,6 +146,7 @@ public: void SetArg(size_t index, Value value); void Serialize(const Block&, std::vector&) const; + static Inst Deserialize(const std::vector&, std::vector::iterator&); void Invalidate(); void ClearArgs(); diff --git a/externals/dynarmic/src/dynarmic/ir/value.cpp b/externals/dynarmic/src/dynarmic/ir/value.cpp index b3699d969..c4971fc5d 100644 --- a/externals/dynarmic/src/dynarmic/ir/value.cpp +++ b/externals/dynarmic/src/dynarmic/ir/value.cpp @@ -198,21 +198,46 @@ AccType Value::GetAccType() const { } void Value::Serialize(const Block& block, std::vector& fres) const { + fres.push_back(0xa91c); + fres.push_back(static_cast(type)); if (type != Type::Opaque) { for (unsigned it = 0; it != sizeof(inner.raw)/sizeof(*inner.raw); it++) fres.push_back(inner.raw[it]); - } else { - unsigned it = 0; - for (const auto& instr : block) { - if (&instr == inner.inst) { - fres.push_back(it); - return; - } - ++it; - } - ASSERT_MSG(false, "Instruction index not found"); + return; } + + unsigned it = 0; + for (const auto& instr : block) { + if (&instr == inner.inst) { + fres.push_back(it); + return; + } + ++it; + } + + ASSERT_FALSE("Instruction index not found"); + UNREACHABLE(); +} + +Value Value::Deserialize(const std::vector& insts, std::vector::iterator& it) { + const bool magic_ok = *(it++) == 0xa91c; + ASSERT_MSG(magic_ok, "Bad IR value magic"); + + Value fres; + fres.type = static_cast(*(it++)); + + if (fres.type != Type::Opaque) { + for (unsigned idx = 0; idx != sizeof(inner.raw)/sizeof(*inner.raw); idx++) + fres.inner.raw[idx] = *(it++); + return fres; + } + + const auto idx = *(it++); + ASSERT(idx < insts.size()); + + fres.inner.inst = insts[idx]; + return fres; } s64 Value::GetImmediateAsS64() const { diff --git a/externals/dynarmic/src/dynarmic/ir/value.h b/externals/dynarmic/src/dynarmic/ir/value.h index a1a2e5f2a..f20c4e51a 100644 --- a/externals/dynarmic/src/dynarmic/ir/value.h +++ b/externals/dynarmic/src/dynarmic/ir/value.h @@ -77,6 +77,7 @@ public: AccType GetAccType() const; void Serialize(const Block&, std::vector&) const; + static Value Deserialize(const std::vector&, std::vector::iterator&); /** * Retrieves the immediate of a Value instance as a signed 64-bit value. diff --git a/src/common/fs/fs_paths.h b/src/common/fs/fs_paths.h index bcf447089..c96a87b8c 100644 --- a/src/common/fs/fs_paths.h +++ b/src/common/fs/fs_paths.h @@ -23,6 +23,7 @@ #define SCREENSHOTS_DIR "screenshots" #define SDMC_DIR "sdmc" #define SHADER_DIR "shader" +#define RECOMPILER_DIR "recompiler" #define TAS_DIR "tas" #define ICONS_DIR "icons" diff --git a/src/common/fs/path_util.cpp b/src/common/fs/path_util.cpp index 4f69db6f5..1df37c231 100644 --- a/src/common/fs/path_util.cpp +++ b/src/common/fs/path_util.cpp @@ -129,6 +129,7 @@ public: GenerateYuzuPath(YuzuPath::ScreenshotsDir, yuzu_path / SCREENSHOTS_DIR); GenerateYuzuPath(YuzuPath::SDMCDir, yuzu_path / SDMC_DIR); GenerateYuzuPath(YuzuPath::ShaderDir, yuzu_path / SHADER_DIR); + GenerateYuzuPath(YuzuPath::RecompilerDir, yuzu_path / RECOMPILER_DIR); GenerateYuzuPath(YuzuPath::TASDir, yuzu_path / TAS_DIR); GenerateYuzuPath(YuzuPath::IconsDir, yuzu_path / ICONS_DIR); } diff --git a/src/common/fs/path_util.h b/src/common/fs/path_util.h index 59301e7ed..d4681ef76 100644 --- a/src/common/fs/path_util.h +++ b/src/common/fs/path_util.h @@ -25,6 +25,7 @@ enum class YuzuPath { ScreenshotsDir, // Where yuzu screenshots are stored. SDMCDir, // Where the emulated SDMC is stored. ShaderDir, // Where shaders are stored. + RecompilerDir, // Where cached recompiler IR is stored. TASDir, // Where TAS scripts are stored. IconsDir, // Where Icons for Windows shortcuts are stored. }; diff --git a/src/common/settings.h b/src/common/settings.h index 2ea310e13..db2489abd 100644 --- a/src/common/settings.h +++ b/src/common/settings.h @@ -212,6 +212,8 @@ struct Values { &use_speed_limit}; SwitchableSetting sync_core_speed{linkage, false, "sync_core_speed", Category::Core, Specialization::Default}; + SwitchableSetting ir_cache{linkage, false, "ir_cache", Category::Core, + Specialization::Default}; // Cpu SwitchableSetting cpu_backend{linkage, diff --git a/src/core/arm/dynarmic/arm_dynarmic_64.cpp b/src/core/arm/dynarmic/arm_dynarmic_64.cpp index 3778cc91d..6e6c3514f 100644 --- a/src/core/arm/dynarmic/arm_dynarmic_64.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic_64.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include "common/settings.h" +#include "common/fs/path_util.h" #include "core/arm/dynarmic/arm_dynarmic.h" #include "core/arm/dynarmic/arm_dynarmic_64.h" #include "core/arm/dynarmic/dynarmic_exclusive_monitor.h" @@ -261,6 +262,11 @@ std::shared_ptr ArmDynarmic64::MakeJit(Common::PageTable* pa // Unpredictable instructions config.define_unpredictable_behaviour = true; + // IR cache + if (Settings::values.ir_cache) { + config.ir_cache_path = Common::FS::GetYuzuPath(Common::FS::YuzuPath::RecompilerDir); + } + // Timing config.wall_clock_cntpct = m_uses_wall_clock; config.enable_cycle_counting = !m_uses_wall_clock; diff --git a/src/yuzu/configuration/shared_translation.cpp b/src/yuzu/configuration/shared_translation.cpp index 56ee9f000..d0327a201 100644 --- a/src/yuzu/configuration/shared_translation.cpp +++ b/src/yuzu/configuration/shared_translation.cpp @@ -76,9 +76,11 @@ std::unique_ptr InitializeTranslations(QWidget* parent) { tr("Synchronizes CPU core speed to game's maximum rendering speed, which can be useful to " "increase FPS without increasing the actual speed of the game (animations, physics, " "etc.)\n" - "It's up to each game if it plays well with this or not. Most games (specially original " - "ones) " + "It's up to each game if it plays well with this or not. Most games (specially originals) " "simply ignore this.\nThis can help play the game stutter-free at a lower framerate.")); + INSERT(Settings, ir_cache, tr("Recompiler cache"), + tr("Caches optimized IR from recompiler. Improves performance\nin some scenarios at the " + "cost of increased disk activity and space consumption.")); // Cpu INSERT(Settings, cpu_accuracy, tr("Accuracy:"),