forked from chinosk/gkms-local
				
			Split Generic Text (#1)
* split generic text * Add splited table, change split flag to `__split__` * add split flag check * fix target
This commit is contained in:
		
							parent
							
								
									6ddf4212d4
								
							
						
					
					
						commit
						0e1ad6959b
					
				| 
						 | 
				
			
			@ -11,6 +11,9 @@
 | 
			
		|||
#include <thread>
 | 
			
		||||
#include <regex>
 | 
			
		||||
#include <ranges>
 | 
			
		||||
#include <string>
 | 
			
		||||
#include <cctype>
 | 
			
		||||
#include <algorithm>
 | 
			
		||||
#include "BaseDefine.h"
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -18,16 +21,63 @@ namespace GakumasLocal::Local {
 | 
			
		|||
    std::unordered_map<std::string, std::string> i18nData{};
 | 
			
		||||
    std::unordered_map<std::string, std::string> i18nDumpData{};
 | 
			
		||||
    std::unordered_map<std::string, std::string> genericText{};
 | 
			
		||||
    std::unordered_map<std::string, std::string> genericSplitText{};
 | 
			
		||||
    std::vector<std::string> genericTextDumpData{};
 | 
			
		||||
    std::vector<std::string> genericSplittedDumpData{};
 | 
			
		||||
    std::vector<std::string> genericOrigTextDumpData{};
 | 
			
		||||
    std::unordered_set<std::string> translatedText{};
 | 
			
		||||
    int genericDumpFileIndex = 0;
 | 
			
		||||
    const std::string splitTextPrefix = "[__split__]";
 | 
			
		||||
 | 
			
		||||
    std::filesystem::path GetBasePath() {
 | 
			
		||||
        return Plugin::GetInstance().GetHookInstaller()->localizationFilesDir;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    std::string trim(const std::string& str) {
 | 
			
		||||
        auto is_not_space = [](char ch) { return !std::isspace(ch); };
 | 
			
		||||
        auto start = std::ranges::find_if(str, is_not_space);
 | 
			
		||||
        auto end = std::ranges::find_if(str | std::views::reverse, is_not_space).base();
 | 
			
		||||
 | 
			
		||||
        if (start < end) {
 | 
			
		||||
            return {start, end};
 | 
			
		||||
        }
 | 
			
		||||
        return "";
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    std::string findInMapIgnoreSpace(const std::string& key, const std::unordered_map<std::string, std::string>& searchMap) {
 | 
			
		||||
        auto is_space = [](char ch) { return std::isspace(ch); };
 | 
			
		||||
        auto front = std::ranges::find_if_not(key, is_space);
 | 
			
		||||
        auto back = std::ranges::find_if_not(key | std::views::reverse, is_space).base();
 | 
			
		||||
 | 
			
		||||
        std::string prefix(key.begin(), front);
 | 
			
		||||
        std::string suffix(back, key.end());
 | 
			
		||||
 | 
			
		||||
        std::string trimmedKey = trim(key);
 | 
			
		||||
        if ( auto it = searchMap.find(trimmedKey); it != searchMap.end()) {
 | 
			
		||||
            return prefix + it->second + suffix;
 | 
			
		||||
        }
 | 
			
		||||
        else {
 | 
			
		||||
            return "";
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    enum class DumpStrStat {
 | 
			
		||||
        DEFAULT = 0,
 | 
			
		||||
        SPLITTABLE_ORIG = 1,
 | 
			
		||||
        SPLITTED = 2
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    enum class SplitTagsTranslationStat {
 | 
			
		||||
        NO_TRANS,
 | 
			
		||||
        PART_TRANS,
 | 
			
		||||
        FULL_TRANS,
 | 
			
		||||
        NO_SPLIT,
 | 
			
		||||
        NO_SPLIT_AND_EMPTY
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    void LoadJsonDataToMap(const std::filesystem::path& filePath, std::unordered_map<std::string, std::string>& dict,
 | 
			
		||||
                           const bool insertToTranslated = false, const bool needClearDict = true) {
 | 
			
		||||
                           const bool insertToTranslated = false, const bool needClearDict = true,
 | 
			
		||||
                           const bool needCheckSplitPrefix = false) {
 | 
			
		||||
        if (!exists(filePath)) return;
 | 
			
		||||
        try {
 | 
			
		||||
            if (needClearDict) {
 | 
			
		||||
| 
						 | 
				
			
			@ -44,8 +94,16 @@ namespace GakumasLocal::Local {
 | 
			
		|||
            for (auto& i : fileData.items()) {
 | 
			
		||||
                const auto& key = i.key();
 | 
			
		||||
                const std::string value = i.value();
 | 
			
		||||
                if (insertToTranslated) translatedText.emplace(value);
 | 
			
		||||
                if (needCheckSplitPrefix && key.starts_with(splitTextPrefix) && value.starts_with(splitTextPrefix)) {
 | 
			
		||||
                    static const auto splitTextPrefixLength = splitTextPrefix.size();
 | 
			
		||||
                    const auto splitValue = value.substr(splitTextPrefixLength);
 | 
			
		||||
                    genericSplitText[key.substr(splitTextPrefixLength)] = splitValue;
 | 
			
		||||
                    if (insertToTranslated) translatedText.emplace(splitValue);
 | 
			
		||||
                }
 | 
			
		||||
                else {
 | 
			
		||||
                    dict[key] = value;
 | 
			
		||||
                    if (insertToTranslated) translatedText.emplace(value);
 | 
			
		||||
                }
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
        catch (std::exception& e) {
 | 
			
		||||
| 
						 | 
				
			
			@ -84,7 +142,7 @@ namespace GakumasLocal::Local {
 | 
			
		|||
    }
 | 
			
		||||
 | 
			
		||||
    void DumpVectorDataToJson(const std::filesystem::path& dumpBasePath, const std::filesystem::path& fileName,
 | 
			
		||||
                           const std::vector<std::string>& vec) {
 | 
			
		||||
                           const std::vector<std::string>& vec, const std::string& prefix = "") {
 | 
			
		||||
        const auto dumpFilePath = dumpBasePath / fileName;
 | 
			
		||||
        try {
 | 
			
		||||
            if (!is_directory(dumpBasePath)) {
 | 
			
		||||
| 
						 | 
				
			
			@ -101,8 +159,13 @@ namespace GakumasLocal::Local {
 | 
			
		|||
            dumpLrcFile.close();
 | 
			
		||||
            auto fileData = nlohmann::ordered_json::parse(fileContent);
 | 
			
		||||
            for (const auto& i : vec) {
 | 
			
		||||
                if (!prefix.empty()) {
 | 
			
		||||
                    fileData[prefix + i] = prefix + i;
 | 
			
		||||
                }
 | 
			
		||||
                else {
 | 
			
		||||
                    fileData[i] = i;
 | 
			
		||||
                }
 | 
			
		||||
            }
 | 
			
		||||
            const auto newStr = fileData.dump(4, 32, false);
 | 
			
		||||
            std::ofstream dumpWriteLrcFile(dumpFilePath, std::ofstream::out);
 | 
			
		||||
            dumpWriteLrcFile << newStr.c_str();
 | 
			
		||||
| 
						 | 
				
			
			@ -199,9 +262,91 @@ namespace GakumasLocal::Local {
 | 
			
		|||
        return ret;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    SplitTagsTranslationStat GetSplitTagsTranslationFull(const std::string& origTextIn, std::string* newText, std::vector<std::string>& unTransResultRet) {
 | 
			
		||||
        // static const std::u16string splitFlags = u"0123456789++--%%【】.";
 | 
			
		||||
        static const std::unordered_set<char16_t> splitFlags = {u'0', u'1', u'2', u'3', u'4', u'5',
 | 
			
		||||
                                                                u'6', u'7', u'8', u'9', u'+', u'+',
 | 
			
		||||
                                                                u'-', u'-', u'%', u'%', u'【', u'】',
 | 
			
		||||
                                                                u'.', u':', u':', u'×'};
 | 
			
		||||
 | 
			
		||||
        const auto origText = Misc::ToUTF16(origTextIn);
 | 
			
		||||
        bool isInTag = false;
 | 
			
		||||
        std::vector<std::string> waitingReplaceTexts{};
 | 
			
		||||
 | 
			
		||||
        std::u16string currentWaitingReplaceText;
 | 
			
		||||
 | 
			
		||||
#define checkCurrentWaitingReplaceTextAndClear() \
 | 
			
		||||
    if (!currentWaitingReplaceText.empty()) { \
 | 
			
		||||
        waitingReplaceTexts.push_back(Misc::ToUTF8(currentWaitingReplaceText)); \
 | 
			
		||||
        currentWaitingReplaceText.clear(); }
 | 
			
		||||
 | 
			
		||||
        for (char16_t currChar : origText) {
 | 
			
		||||
            if (currChar == u'<') {
 | 
			
		||||
                isInTag = true;
 | 
			
		||||
            }
 | 
			
		||||
            if (currChar == u'>') {
 | 
			
		||||
                isInTag = false;
 | 
			
		||||
                checkCurrentWaitingReplaceTextAndClear()
 | 
			
		||||
                continue;
 | 
			
		||||
            }
 | 
			
		||||
            if (isInTag) {
 | 
			
		||||
                checkCurrentWaitingReplaceTextAndClear()
 | 
			
		||||
                continue;
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
            if (!splitFlags.contains(currChar)) {
 | 
			
		||||
                currentWaitingReplaceText.push_back(currChar);
 | 
			
		||||
            }
 | 
			
		||||
            else {
 | 
			
		||||
                checkCurrentWaitingReplaceTextAndClear()
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
        if (waitingReplaceTexts.empty()) {
 | 
			
		||||
            if (currentWaitingReplaceText.empty()) {
 | 
			
		||||
                return SplitTagsTranslationStat::NO_SPLIT_AND_EMPTY;
 | 
			
		||||
            }
 | 
			
		||||
            else {
 | 
			
		||||
                return SplitTagsTranslationStat::NO_SPLIT;
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
        checkCurrentWaitingReplaceTextAndClear()
 | 
			
		||||
 | 
			
		||||
        *newText = origTextIn;
 | 
			
		||||
        SplitTagsTranslationStat ret;
 | 
			
		||||
        bool hasTrans = false;
 | 
			
		||||
        bool hasNotTrans = false;
 | 
			
		||||
        if (!waitingReplaceTexts.empty()) {
 | 
			
		||||
            for (const auto& i : waitingReplaceTexts) {
 | 
			
		||||
                const auto searchResult = findInMapIgnoreSpace(i, genericSplitText);
 | 
			
		||||
                if (!searchResult.empty()) {
 | 
			
		||||
                    ReplaceString(newText, i, searchResult);
 | 
			
		||||
                    hasTrans = true;
 | 
			
		||||
                }
 | 
			
		||||
                else {
 | 
			
		||||
                    unTransResultRet.emplace_back(trim(i));
 | 
			
		||||
                    hasNotTrans = true;
 | 
			
		||||
                }
 | 
			
		||||
            }
 | 
			
		||||
            if (hasTrans && hasNotTrans) {
 | 
			
		||||
                ret = SplitTagsTranslationStat::PART_TRANS;
 | 
			
		||||
            }
 | 
			
		||||
            else if (hasTrans && !hasNotTrans) {
 | 
			
		||||
                ret = SplitTagsTranslationStat::FULL_TRANS;
 | 
			
		||||
            }
 | 
			
		||||
            else {
 | 
			
		||||
                ret = SplitTagsTranslationStat::NO_TRANS;
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
        else {
 | 
			
		||||
            ret = SplitTagsTranslationStat::NO_TRANS;
 | 
			
		||||
        }
 | 
			
		||||
        return ret;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    void LoadData() {
 | 
			
		||||
        static auto localizationFile = GetBasePath() / "local-files" / "localization.json";
 | 
			
		||||
        static auto genericFile = GetBasePath() / "local-files" / "generic.json";
 | 
			
		||||
        static auto genericSplitFile = GetBasePath() / "local-files" / "generic.split.json";
 | 
			
		||||
        static auto genericDir = GetBasePath() / "local-files" / "genericTrans";
 | 
			
		||||
 | 
			
		||||
        if (!std::filesystem::is_regular_file(localizationFile)) {
 | 
			
		||||
| 
						 | 
				
			
			@ -211,13 +356,20 @@ namespace GakumasLocal::Local {
 | 
			
		|||
        LoadJsonDataToMap(localizationFile, i18nData, true);
 | 
			
		||||
        Log::InfoFmt("%ld localization items loaded.", i18nData.size());
 | 
			
		||||
 | 
			
		||||
        LoadJsonDataToMap(genericFile, genericText, true);
 | 
			
		||||
        LoadJsonDataToMap(genericFile, genericText, true, true, true);
 | 
			
		||||
        genericSplitText.clear();
 | 
			
		||||
        LoadJsonDataToMap(genericSplitFile, genericSplitText, true, true, true);
 | 
			
		||||
        if (std::filesystem::exists(genericDir) || std::filesystem::is_directory(genericDir)) {
 | 
			
		||||
            for (const auto& entry : std::filesystem::recursive_directory_iterator(genericDir)) {
 | 
			
		||||
                if (std::filesystem::is_regular_file(entry.path())) {
 | 
			
		||||
                    const auto currFile = entry.path();
 | 
			
		||||
                    const auto& currFile = entry.path();
 | 
			
		||||
                    if (to_lower(currFile.extension().string()) == ".json") {
 | 
			
		||||
                        LoadJsonDataToMap(currFile, genericText, true, false);
 | 
			
		||||
                        if (currFile.filename().string().ends_with(".split.json")) {  // split text file
 | 
			
		||||
                            LoadJsonDataToMap(currFile, genericSplitText, true, false, true);
 | 
			
		||||
                        }
 | 
			
		||||
                        else {
 | 
			
		||||
                            LoadJsonDataToMap(currFile, genericText, true, false, true);
 | 
			
		||||
                        }
 | 
			
		||||
                    }
 | 
			
		||||
                }
 | 
			
		||||
            }
 | 
			
		||||
| 
						 | 
				
			
			@ -285,29 +437,47 @@ namespace GakumasLocal::Local {
 | 
			
		|||
        return false;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    std::string GetDumpGenericFileName() {
 | 
			
		||||
    std::string GetDumpGenericFileName(DumpStrStat stat = DumpStrStat::DEFAULT) {
 | 
			
		||||
        if (stat == DumpStrStat::SPLITTABLE_ORIG) {
 | 
			
		||||
            if (genericDumpFileIndex == 0) return "generic_orig.json";
 | 
			
		||||
            return Log::StringFormat("generic_orig_%d.json", genericDumpFileIndex);
 | 
			
		||||
        }
 | 
			
		||||
        else {
 | 
			
		||||
            if (genericDumpFileIndex == 0) return "generic.json";
 | 
			
		||||
            return Log::StringFormat("generic_%d.json", genericDumpFileIndex);
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    bool inDumpGeneric = false;
 | 
			
		||||
    void DumpGenericText(const std::string& origText) {
 | 
			
		||||
    void DumpGenericText(const std::string& origText, DumpStrStat stat = DumpStrStat::DEFAULT) {
 | 
			
		||||
        if (translatedText.contains(origText)) return;
 | 
			
		||||
 | 
			
		||||
        if (std::find(genericTextDumpData.begin(), genericTextDumpData.end(), origText) != genericTextDumpData.end()) {
 | 
			
		||||
        std::array<std::reference_wrapper<std::vector<std::string>>, 3> targets = {
 | 
			
		||||
                genericTextDumpData,
 | 
			
		||||
                genericOrigTextDumpData,
 | 
			
		||||
                genericSplittedDumpData
 | 
			
		||||
        };
 | 
			
		||||
 | 
			
		||||
        auto& appendTarget = targets[static_cast<int>(stat)].get();
 | 
			
		||||
 | 
			
		||||
        if (std::find(appendTarget.begin(), appendTarget.end(), origText) != appendTarget.end()) {
 | 
			
		||||
            return;
 | 
			
		||||
        }
 | 
			
		||||
        if (IsPureStringValue(origText)) return;
 | 
			
		||||
 | 
			
		||||
        genericTextDumpData.push_back(origText);
 | 
			
		||||
        appendTarget.push_back(origText);
 | 
			
		||||
        static auto dumpBasePath = GetBasePath() / "dump-files";
 | 
			
		||||
 | 
			
		||||
        if (inDumpGeneric) return;
 | 
			
		||||
        inDumpGeneric = true;
 | 
			
		||||
        std::thread([](){
 | 
			
		||||
            std::this_thread::sleep_for(std::chrono::seconds(5));
 | 
			
		||||
            DumpVectorDataToJson(dumpBasePath, GetDumpGenericFileName(), genericTextDumpData);
 | 
			
		||||
            DumpVectorDataToJson(dumpBasePath, GetDumpGenericFileName(DumpStrStat::DEFAULT), genericTextDumpData);
 | 
			
		||||
            DumpVectorDataToJson(dumpBasePath, GetDumpGenericFileName(DumpStrStat::SPLITTABLE_ORIG), genericOrigTextDumpData);
 | 
			
		||||
            DumpVectorDataToJson(dumpBasePath, GetDumpGenericFileName(DumpStrStat::SPLITTED), genericSplittedDumpData, splitTextPrefix);
 | 
			
		||||
            genericTextDumpData.clear();
 | 
			
		||||
            genericSplittedDumpData.clear();
 | 
			
		||||
            genericOrigTextDumpData.clear();
 | 
			
		||||
            inDumpGeneric = false;
 | 
			
		||||
        }).detach();
 | 
			
		||||
    }
 | 
			
		||||
| 
						 | 
				
			
			@ -318,25 +488,50 @@ namespace GakumasLocal::Local {
 | 
			
		|||
            return true;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        auto ret = false;
 | 
			
		||||
 | 
			
		||||
        std::vector<std::string> unTransResultRet;
 | 
			
		||||
        if (GetSplitTagsTranslation(origText, newStr, unTransResultRet)) {
 | 
			
		||||
        const auto splitTransStat = GetSplitTagsTranslationFull(origText, newStr, unTransResultRet);
 | 
			
		||||
        switch (splitTransStat) {
 | 
			
		||||
            case SplitTagsTranslationStat::FULL_TRANS: {
 | 
			
		||||
                return true;
 | 
			
		||||
            } break;
 | 
			
		||||
 | 
			
		||||
            case SplitTagsTranslationStat::NO_SPLIT_AND_EMPTY: {
 | 
			
		||||
                return false;
 | 
			
		||||
            } break;
 | 
			
		||||
 | 
			
		||||
            case SplitTagsTranslationStat::NO_SPLIT: {
 | 
			
		||||
                ret = false;
 | 
			
		||||
            } break;
 | 
			
		||||
 | 
			
		||||
            case SplitTagsTranslationStat::NO_TRANS: {
 | 
			
		||||
                ret = false;
 | 
			
		||||
            } break;
 | 
			
		||||
 | 
			
		||||
            case SplitTagsTranslationStat::PART_TRANS: {
 | 
			
		||||
                ret = true;
 | 
			
		||||
            } break;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        if (!Config::dumpText) {
 | 
			
		||||
            return false;
 | 
			
		||||
            return ret;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        if (unTransResultRet.empty()) {
 | 
			
		||||
        if (unTransResultRet.empty() || (splitTransStat == SplitTagsTranslationStat::NO_SPLIT)) {
 | 
			
		||||
            DumpGenericText(origText);
 | 
			
		||||
        }
 | 
			
		||||
        else {
 | 
			
		||||
            for (const auto& i : unTransResultRet) {
 | 
			
		||||
                DumpGenericText(i);
 | 
			
		||||
                DumpGenericText(i, DumpStrStat::SPLITTED);
 | 
			
		||||
            }
 | 
			
		||||
            // 若未翻译部分长度为1,且未翻译文本等于原文本,则不 dump 到原文本文件
 | 
			
		||||
            //if (unTransResultRet.size() != 1 || unTransResultRet[0] != origText) {
 | 
			
		||||
                DumpGenericText(origText, DumpStrStat::SPLITTABLE_ORIG);
 | 
			
		||||
            //}
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        return false;
 | 
			
		||||
        return ret;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    std::string ChangeDumpTextIndex(int changeValue) {
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1471,6 +1471,25 @@ public:
 | 
			
		|||
				}
 | 
			
		||||
			}
 | 
			
		||||
 | 
			
		||||
            [[nodiscard]] auto ToWString() const -> std::u16string {
 | 
			
		||||
#if WINDOWS_MODE
 | 
			
		||||
                if (IsBadReadPtr(this, sizeof(String))) return {};
 | 
			
		||||
				if (IsBadReadPtr(m_firstChar, m_stringLength)) return {};
 | 
			
		||||
#endif
 | 
			
		||||
                if (!this) return {};
 | 
			
		||||
                try {
 | 
			
		||||
                    // using convert_typeX = std::codecvt_utf8<wchar_t>;
 | 
			
		||||
                    // std::wstring_convert<convert_typeX> converterX;
 | 
			
		||||
                    // return converterX.to_bytes(m_firstChar);
 | 
			
		||||
                    return {chars};
 | 
			
		||||
                }
 | 
			
		||||
                catch (std::exception& e) {
 | 
			
		||||
                    std::cout << "String Invoke Error\n";
 | 
			
		||||
                    GakumasLocal::Log::ErrorFmt("String Invoke Error: %s", e.what());
 | 
			
		||||
                    return {};
 | 
			
		||||
                }
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
			auto operator=(const std::string& newString) const -> String* { return New(newString); }
 | 
			
		||||
 | 
			
		||||
			auto operator==(const std::wstring& newString) const -> bool { return Equals(newString); }
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in New Issue