forked from chinosk/gkms-local
Split Generic Text (#1)
* split generic text * Add splited table, change split flag to `__split__` * add split flag check * fix target
This commit is contained in:
parent
6ddf4212d4
commit
0e1ad6959b
|
@ -11,6 +11,9 @@
|
||||||
#include <thread>
|
#include <thread>
|
||||||
#include <regex>
|
#include <regex>
|
||||||
#include <ranges>
|
#include <ranges>
|
||||||
|
#include <string>
|
||||||
|
#include <cctype>
|
||||||
|
#include <algorithm>
|
||||||
#include "BaseDefine.h"
|
#include "BaseDefine.h"
|
||||||
|
|
||||||
|
|
||||||
|
@ -18,16 +21,63 @@ namespace GakumasLocal::Local {
|
||||||
std::unordered_map<std::string, std::string> i18nData{};
|
std::unordered_map<std::string, std::string> i18nData{};
|
||||||
std::unordered_map<std::string, std::string> i18nDumpData{};
|
std::unordered_map<std::string, std::string> i18nDumpData{};
|
||||||
std::unordered_map<std::string, std::string> genericText{};
|
std::unordered_map<std::string, std::string> genericText{};
|
||||||
|
std::unordered_map<std::string, std::string> genericSplitText{};
|
||||||
std::vector<std::string> genericTextDumpData{};
|
std::vector<std::string> genericTextDumpData{};
|
||||||
|
std::vector<std::string> genericSplittedDumpData{};
|
||||||
|
std::vector<std::string> genericOrigTextDumpData{};
|
||||||
std::unordered_set<std::string> translatedText{};
|
std::unordered_set<std::string> translatedText{};
|
||||||
int genericDumpFileIndex = 0;
|
int genericDumpFileIndex = 0;
|
||||||
|
const std::string splitTextPrefix = "[__split__]";
|
||||||
|
|
||||||
std::filesystem::path GetBasePath() {
|
std::filesystem::path GetBasePath() {
|
||||||
return Plugin::GetInstance().GetHookInstaller()->localizationFilesDir;
|
return Plugin::GetInstance().GetHookInstaller()->localizationFilesDir;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::string trim(const std::string& str) {
|
||||||
|
auto is_not_space = [](char ch) { return !std::isspace(ch); };
|
||||||
|
auto start = std::ranges::find_if(str, is_not_space);
|
||||||
|
auto end = std::ranges::find_if(str | std::views::reverse, is_not_space).base();
|
||||||
|
|
||||||
|
if (start < end) {
|
||||||
|
return {start, end};
|
||||||
|
}
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string findInMapIgnoreSpace(const std::string& key, const std::unordered_map<std::string, std::string>& searchMap) {
|
||||||
|
auto is_space = [](char ch) { return std::isspace(ch); };
|
||||||
|
auto front = std::ranges::find_if_not(key, is_space);
|
||||||
|
auto back = std::ranges::find_if_not(key | std::views::reverse, is_space).base();
|
||||||
|
|
||||||
|
std::string prefix(key.begin(), front);
|
||||||
|
std::string suffix(back, key.end());
|
||||||
|
|
||||||
|
std::string trimmedKey = trim(key);
|
||||||
|
if ( auto it = searchMap.find(trimmedKey); it != searchMap.end()) {
|
||||||
|
return prefix + it->second + suffix;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
enum class DumpStrStat {
|
||||||
|
DEFAULT = 0,
|
||||||
|
SPLITTABLE_ORIG = 1,
|
||||||
|
SPLITTED = 2
|
||||||
|
};
|
||||||
|
|
||||||
|
enum class SplitTagsTranslationStat {
|
||||||
|
NO_TRANS,
|
||||||
|
PART_TRANS,
|
||||||
|
FULL_TRANS,
|
||||||
|
NO_SPLIT,
|
||||||
|
NO_SPLIT_AND_EMPTY
|
||||||
|
};
|
||||||
|
|
||||||
void LoadJsonDataToMap(const std::filesystem::path& filePath, std::unordered_map<std::string, std::string>& dict,
|
void LoadJsonDataToMap(const std::filesystem::path& filePath, std::unordered_map<std::string, std::string>& dict,
|
||||||
const bool insertToTranslated = false, const bool needClearDict = true) {
|
const bool insertToTranslated = false, const bool needClearDict = true,
|
||||||
|
const bool needCheckSplitPrefix = false) {
|
||||||
if (!exists(filePath)) return;
|
if (!exists(filePath)) return;
|
||||||
try {
|
try {
|
||||||
if (needClearDict) {
|
if (needClearDict) {
|
||||||
|
@ -44,8 +94,16 @@ namespace GakumasLocal::Local {
|
||||||
for (auto& i : fileData.items()) {
|
for (auto& i : fileData.items()) {
|
||||||
const auto& key = i.key();
|
const auto& key = i.key();
|
||||||
const std::string value = i.value();
|
const std::string value = i.value();
|
||||||
if (insertToTranslated) translatedText.emplace(value);
|
if (needCheckSplitPrefix && key.starts_with(splitTextPrefix) && value.starts_with(splitTextPrefix)) {
|
||||||
dict[key] = value;
|
static const auto splitTextPrefixLength = splitTextPrefix.size();
|
||||||
|
const auto splitValue = value.substr(splitTextPrefixLength);
|
||||||
|
genericSplitText[key.substr(splitTextPrefixLength)] = splitValue;
|
||||||
|
if (insertToTranslated) translatedText.emplace(splitValue);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
dict[key] = value;
|
||||||
|
if (insertToTranslated) translatedText.emplace(value);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
catch (std::exception& e) {
|
catch (std::exception& e) {
|
||||||
|
@ -84,7 +142,7 @@ namespace GakumasLocal::Local {
|
||||||
}
|
}
|
||||||
|
|
||||||
void DumpVectorDataToJson(const std::filesystem::path& dumpBasePath, const std::filesystem::path& fileName,
|
void DumpVectorDataToJson(const std::filesystem::path& dumpBasePath, const std::filesystem::path& fileName,
|
||||||
const std::vector<std::string>& vec) {
|
const std::vector<std::string>& vec, const std::string& prefix = "") {
|
||||||
const auto dumpFilePath = dumpBasePath / fileName;
|
const auto dumpFilePath = dumpBasePath / fileName;
|
||||||
try {
|
try {
|
||||||
if (!is_directory(dumpBasePath)) {
|
if (!is_directory(dumpBasePath)) {
|
||||||
|
@ -101,7 +159,12 @@ namespace GakumasLocal::Local {
|
||||||
dumpLrcFile.close();
|
dumpLrcFile.close();
|
||||||
auto fileData = nlohmann::ordered_json::parse(fileContent);
|
auto fileData = nlohmann::ordered_json::parse(fileContent);
|
||||||
for (const auto& i : vec) {
|
for (const auto& i : vec) {
|
||||||
fileData[i] = i;
|
if (!prefix.empty()) {
|
||||||
|
fileData[prefix + i] = prefix + i;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
fileData[i] = i;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
const auto newStr = fileData.dump(4, 32, false);
|
const auto newStr = fileData.dump(4, 32, false);
|
||||||
std::ofstream dumpWriteLrcFile(dumpFilePath, std::ofstream::out);
|
std::ofstream dumpWriteLrcFile(dumpFilePath, std::ofstream::out);
|
||||||
|
@ -199,9 +262,91 @@ namespace GakumasLocal::Local {
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
SplitTagsTranslationStat GetSplitTagsTranslationFull(const std::string& origTextIn, std::string* newText, std::vector<std::string>& unTransResultRet) {
|
||||||
|
// static const std::u16string splitFlags = u"0123456789++--%%【】.";
|
||||||
|
static const std::unordered_set<char16_t> splitFlags = {u'0', u'1', u'2', u'3', u'4', u'5',
|
||||||
|
u'6', u'7', u'8', u'9', u'+', u'+',
|
||||||
|
u'-', u'-', u'%', u'%', u'【', u'】',
|
||||||
|
u'.', u':', u':', u'×'};
|
||||||
|
|
||||||
|
const auto origText = Misc::ToUTF16(origTextIn);
|
||||||
|
bool isInTag = false;
|
||||||
|
std::vector<std::string> waitingReplaceTexts{};
|
||||||
|
|
||||||
|
std::u16string currentWaitingReplaceText;
|
||||||
|
|
||||||
|
#define checkCurrentWaitingReplaceTextAndClear() \
|
||||||
|
if (!currentWaitingReplaceText.empty()) { \
|
||||||
|
waitingReplaceTexts.push_back(Misc::ToUTF8(currentWaitingReplaceText)); \
|
||||||
|
currentWaitingReplaceText.clear(); }
|
||||||
|
|
||||||
|
for (char16_t currChar : origText) {
|
||||||
|
if (currChar == u'<') {
|
||||||
|
isInTag = true;
|
||||||
|
}
|
||||||
|
if (currChar == u'>') {
|
||||||
|
isInTag = false;
|
||||||
|
checkCurrentWaitingReplaceTextAndClear()
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (isInTag) {
|
||||||
|
checkCurrentWaitingReplaceTextAndClear()
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!splitFlags.contains(currChar)) {
|
||||||
|
currentWaitingReplaceText.push_back(currChar);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
checkCurrentWaitingReplaceTextAndClear()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (waitingReplaceTexts.empty()) {
|
||||||
|
if (currentWaitingReplaceText.empty()) {
|
||||||
|
return SplitTagsTranslationStat::NO_SPLIT_AND_EMPTY;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
return SplitTagsTranslationStat::NO_SPLIT;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
checkCurrentWaitingReplaceTextAndClear()
|
||||||
|
|
||||||
|
*newText = origTextIn;
|
||||||
|
SplitTagsTranslationStat ret;
|
||||||
|
bool hasTrans = false;
|
||||||
|
bool hasNotTrans = false;
|
||||||
|
if (!waitingReplaceTexts.empty()) {
|
||||||
|
for (const auto& i : waitingReplaceTexts) {
|
||||||
|
const auto searchResult = findInMapIgnoreSpace(i, genericSplitText);
|
||||||
|
if (!searchResult.empty()) {
|
||||||
|
ReplaceString(newText, i, searchResult);
|
||||||
|
hasTrans = true;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
unTransResultRet.emplace_back(trim(i));
|
||||||
|
hasNotTrans = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (hasTrans && hasNotTrans) {
|
||||||
|
ret = SplitTagsTranslationStat::PART_TRANS;
|
||||||
|
}
|
||||||
|
else if (hasTrans && !hasNotTrans) {
|
||||||
|
ret = SplitTagsTranslationStat::FULL_TRANS;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
ret = SplitTagsTranslationStat::NO_TRANS;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
ret = SplitTagsTranslationStat::NO_TRANS;
|
||||||
|
}
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
void LoadData() {
|
void LoadData() {
|
||||||
static auto localizationFile = GetBasePath() / "local-files" / "localization.json";
|
static auto localizationFile = GetBasePath() / "local-files" / "localization.json";
|
||||||
static auto genericFile = GetBasePath() / "local-files" / "generic.json";
|
static auto genericFile = GetBasePath() / "local-files" / "generic.json";
|
||||||
|
static auto genericSplitFile = GetBasePath() / "local-files" / "generic.split.json";
|
||||||
static auto genericDir = GetBasePath() / "local-files" / "genericTrans";
|
static auto genericDir = GetBasePath() / "local-files" / "genericTrans";
|
||||||
|
|
||||||
if (!std::filesystem::is_regular_file(localizationFile)) {
|
if (!std::filesystem::is_regular_file(localizationFile)) {
|
||||||
|
@ -211,13 +356,20 @@ namespace GakumasLocal::Local {
|
||||||
LoadJsonDataToMap(localizationFile, i18nData, true);
|
LoadJsonDataToMap(localizationFile, i18nData, true);
|
||||||
Log::InfoFmt("%ld localization items loaded.", i18nData.size());
|
Log::InfoFmt("%ld localization items loaded.", i18nData.size());
|
||||||
|
|
||||||
LoadJsonDataToMap(genericFile, genericText, true);
|
LoadJsonDataToMap(genericFile, genericText, true, true, true);
|
||||||
|
genericSplitText.clear();
|
||||||
|
LoadJsonDataToMap(genericSplitFile, genericSplitText, true, true, true);
|
||||||
if (std::filesystem::exists(genericDir) || std::filesystem::is_directory(genericDir)) {
|
if (std::filesystem::exists(genericDir) || std::filesystem::is_directory(genericDir)) {
|
||||||
for (const auto& entry : std::filesystem::recursive_directory_iterator(genericDir)) {
|
for (const auto& entry : std::filesystem::recursive_directory_iterator(genericDir)) {
|
||||||
if (std::filesystem::is_regular_file(entry.path())) {
|
if (std::filesystem::is_regular_file(entry.path())) {
|
||||||
const auto currFile = entry.path();
|
const auto& currFile = entry.path();
|
||||||
if (to_lower(currFile.extension().string()) == ".json") {
|
if (to_lower(currFile.extension().string()) == ".json") {
|
||||||
LoadJsonDataToMap(currFile, genericText, true, false);
|
if (currFile.filename().string().ends_with(".split.json")) { // split text file
|
||||||
|
LoadJsonDataToMap(currFile, genericSplitText, true, false, true);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
LoadJsonDataToMap(currFile, genericText, true, false, true);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -285,29 +437,47 @@ namespace GakumasLocal::Local {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string GetDumpGenericFileName() {
|
std::string GetDumpGenericFileName(DumpStrStat stat = DumpStrStat::DEFAULT) {
|
||||||
if (genericDumpFileIndex == 0) return "generic.json";
|
if (stat == DumpStrStat::SPLITTABLE_ORIG) {
|
||||||
return Log::StringFormat("generic_%d.json", genericDumpFileIndex);
|
if (genericDumpFileIndex == 0) return "generic_orig.json";
|
||||||
|
return Log::StringFormat("generic_orig_%d.json", genericDumpFileIndex);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
if (genericDumpFileIndex == 0) return "generic.json";
|
||||||
|
return Log::StringFormat("generic_%d.json", genericDumpFileIndex);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool inDumpGeneric = false;
|
bool inDumpGeneric = false;
|
||||||
void DumpGenericText(const std::string& origText) {
|
void DumpGenericText(const std::string& origText, DumpStrStat stat = DumpStrStat::DEFAULT) {
|
||||||
if (translatedText.contains(origText)) return;
|
if (translatedText.contains(origText)) return;
|
||||||
|
|
||||||
if (std::find(genericTextDumpData.begin(), genericTextDumpData.end(), origText) != genericTextDumpData.end()) {
|
std::array<std::reference_wrapper<std::vector<std::string>>, 3> targets = {
|
||||||
|
genericTextDumpData,
|
||||||
|
genericOrigTextDumpData,
|
||||||
|
genericSplittedDumpData
|
||||||
|
};
|
||||||
|
|
||||||
|
auto& appendTarget = targets[static_cast<int>(stat)].get();
|
||||||
|
|
||||||
|
if (std::find(appendTarget.begin(), appendTarget.end(), origText) != appendTarget.end()) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if (IsPureStringValue(origText)) return;
|
if (IsPureStringValue(origText)) return;
|
||||||
|
|
||||||
genericTextDumpData.push_back(origText);
|
appendTarget.push_back(origText);
|
||||||
static auto dumpBasePath = GetBasePath() / "dump-files";
|
static auto dumpBasePath = GetBasePath() / "dump-files";
|
||||||
|
|
||||||
if (inDumpGeneric) return;
|
if (inDumpGeneric) return;
|
||||||
inDumpGeneric = true;
|
inDumpGeneric = true;
|
||||||
std::thread([](){
|
std::thread([](){
|
||||||
std::this_thread::sleep_for(std::chrono::seconds(5));
|
std::this_thread::sleep_for(std::chrono::seconds(5));
|
||||||
DumpVectorDataToJson(dumpBasePath, GetDumpGenericFileName(), genericTextDumpData);
|
DumpVectorDataToJson(dumpBasePath, GetDumpGenericFileName(DumpStrStat::DEFAULT), genericTextDumpData);
|
||||||
|
DumpVectorDataToJson(dumpBasePath, GetDumpGenericFileName(DumpStrStat::SPLITTABLE_ORIG), genericOrigTextDumpData);
|
||||||
|
DumpVectorDataToJson(dumpBasePath, GetDumpGenericFileName(DumpStrStat::SPLITTED), genericSplittedDumpData, splitTextPrefix);
|
||||||
genericTextDumpData.clear();
|
genericTextDumpData.clear();
|
||||||
|
genericSplittedDumpData.clear();
|
||||||
|
genericOrigTextDumpData.clear();
|
||||||
inDumpGeneric = false;
|
inDumpGeneric = false;
|
||||||
}).detach();
|
}).detach();
|
||||||
}
|
}
|
||||||
|
@ -318,25 +488,50 @@ namespace GakumasLocal::Local {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
auto ret = false;
|
||||||
|
|
||||||
std::vector<std::string> unTransResultRet;
|
std::vector<std::string> unTransResultRet;
|
||||||
if (GetSplitTagsTranslation(origText, newStr, unTransResultRet)) {
|
const auto splitTransStat = GetSplitTagsTranslationFull(origText, newStr, unTransResultRet);
|
||||||
return true;
|
switch (splitTransStat) {
|
||||||
|
case SplitTagsTranslationStat::FULL_TRANS: {
|
||||||
|
return true;
|
||||||
|
} break;
|
||||||
|
|
||||||
|
case SplitTagsTranslationStat::NO_SPLIT_AND_EMPTY: {
|
||||||
|
return false;
|
||||||
|
} break;
|
||||||
|
|
||||||
|
case SplitTagsTranslationStat::NO_SPLIT: {
|
||||||
|
ret = false;
|
||||||
|
} break;
|
||||||
|
|
||||||
|
case SplitTagsTranslationStat::NO_TRANS: {
|
||||||
|
ret = false;
|
||||||
|
} break;
|
||||||
|
|
||||||
|
case SplitTagsTranslationStat::PART_TRANS: {
|
||||||
|
ret = true;
|
||||||
|
} break;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!Config::dumpText) {
|
if (!Config::dumpText) {
|
||||||
return false;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (unTransResultRet.empty()) {
|
if (unTransResultRet.empty() || (splitTransStat == SplitTagsTranslationStat::NO_SPLIT)) {
|
||||||
DumpGenericText(origText);
|
DumpGenericText(origText);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
for (const auto& i : unTransResultRet) {
|
for (const auto& i : unTransResultRet) {
|
||||||
DumpGenericText(i);
|
DumpGenericText(i, DumpStrStat::SPLITTED);
|
||||||
}
|
}
|
||||||
|
// 若未翻译部分长度为1,且未翻译文本等于原文本,则不 dump 到原文本文件
|
||||||
|
//if (unTransResultRet.size() != 1 || unTransResultRet[0] != origText) {
|
||||||
|
DumpGenericText(origText, DumpStrStat::SPLITTABLE_ORIG);
|
||||||
|
//}
|
||||||
}
|
}
|
||||||
|
|
||||||
return false;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string ChangeDumpTextIndex(int changeValue) {
|
std::string ChangeDumpTextIndex(int changeValue) {
|
||||||
|
|
|
@ -1471,6 +1471,25 @@ public:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
[[nodiscard]] auto ToWString() const -> std::u16string {
|
||||||
|
#if WINDOWS_MODE
|
||||||
|
if (IsBadReadPtr(this, sizeof(String))) return {};
|
||||||
|
if (IsBadReadPtr(m_firstChar, m_stringLength)) return {};
|
||||||
|
#endif
|
||||||
|
if (!this) return {};
|
||||||
|
try {
|
||||||
|
// using convert_typeX = std::codecvt_utf8<wchar_t>;
|
||||||
|
// std::wstring_convert<convert_typeX> converterX;
|
||||||
|
// return converterX.to_bytes(m_firstChar);
|
||||||
|
return {chars};
|
||||||
|
}
|
||||||
|
catch (std::exception& e) {
|
||||||
|
std::cout << "String Invoke Error\n";
|
||||||
|
GakumasLocal::Log::ErrorFmt("String Invoke Error: %s", e.what());
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
auto operator=(const std::string& newString) const -> String* { return New(newString); }
|
auto operator=(const std::string& newString) const -> String* { return New(newString); }
|
||||||
|
|
||||||
auto operator==(const std::wstring& newString) const -> bool { return Equals(newString); }
|
auto operator==(const std::wstring& newString) const -> bool { return Equals(newString); }
|
||||||
|
|
Loading…
Reference in New Issue