Skip to content

Commit

Permalink
Merge pull request #3303 from Ghabry/string-var-utf8
Browse files Browse the repository at this point in the history
String Variables: Operate on Codepoints
  • Loading branch information
fdelapena authored Dec 5, 2024
2 parents edec538 + 713db9d commit 6a45364
Show file tree
Hide file tree
Showing 8 changed files with 259 additions and 59 deletions.
43 changes: 21 additions & 22 deletions src/game_interpreter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4858,7 +4858,7 @@ bool Game_Interpreter::CommandManiacControlStrings(lcf::rpg::EventCommand const&
int pos = 0;
std::string op_string;
for (int i = 0; i < 3; i++) {
op_string += ToString(Main_Data::game_strings->GetWithModeAndPos(str_param, modes[i], args[i], &pos, *Main_Data::game_variables));
op_string += Main_Data::game_strings->GetWithModeAndPos(str_param, modes[i], args[i], pos, *Main_Data::game_variables);
}
result = std::move(op_string);
break;
Expand All @@ -4869,34 +4869,30 @@ bool Game_Interpreter::CommandManiacControlStrings(lcf::rpg::EventCommand const&
std::string base, insert;

args[1] = ValueOrVariable(modes[1], args[1]);
base = ToString(Main_Data::game_strings->GetWithModeAndPos(str_param, modes[0], args[0], &pos, *Main_Data::game_variables));
insert = ToString(Main_Data::game_strings->GetWithModeAndPos(str_param, modes[2], args[2], &pos, *Main_Data::game_variables));
base = Main_Data::game_strings->GetWithModeAndPos(str_param, modes[0], args[0], pos, *Main_Data::game_variables);
insert = Main_Data::game_strings->GetWithModeAndPos(str_param, modes[2], args[2], pos, *Main_Data::game_variables);

result = base.insert(args[1], insert);
result = Game_Strings::Insert(base, insert, args[1]);
break;
}
case 8: //Replace (rep) <fn(string base, string search, string replacement)>
{
int pos = 0;
std::string base, search, replacement;

base = ToString(Main_Data::game_strings->GetWithModeAndPos(str_param, modes[0], args[0], &pos, *Main_Data::game_variables));
search = ToString(Main_Data::game_strings->GetWithModeAndPos(str_param, modes[1], args[1], &pos, *Main_Data::game_variables));
replacement = ToString(Main_Data::game_strings->GetWithModeAndPos(str_param, modes[2], args[2], &pos, *Main_Data::game_variables));
base = Main_Data::game_strings->GetWithModeAndPos(str_param, modes[0], args[0], pos, *Main_Data::game_variables);
search = Main_Data::game_strings->GetWithModeAndPos(str_param, modes[1], args[1], pos, *Main_Data::game_variables);
replacement = Main_Data::game_strings->GetWithModeAndPos(str_param, modes[2], args[2], pos, *Main_Data::game_variables);
result = Utils::ReplaceAll(base, search, replacement);

std::size_t index = base.find(search);
while (index != std::string::npos) {
base.replace(index, search.length(), replacement);
index = base.find(search, index + replacement.length());
}

result = std::move(base);
break;
}
case 9: //Substring (subs) <fn(string base, int index, int size)>
args[1] = ValueOrVariable(modes[1], args[1]);
args[2] = ValueOrVariable(modes[2], args[2]);
result = ToString(Main_Data::game_strings->GetWithMode(str_param, modes[0], args[0], *Main_Data::game_variables).substr(args[1], args[2]));

result = ToString(Main_Data::game_strings->GetWithMode(str_param, modes[0], args[0], *Main_Data::game_variables));
result = Game_Strings::Substring(result, args[1], args[2]);
break;
case 10: //Join (join) <fn(string delimiter, int id, int size)>
{
Expand Down Expand Up @@ -4941,21 +4937,24 @@ bool Game_Interpreter::CommandManiacControlStrings(lcf::rpg::EventCommand const&
args[1] = ValueOrVariable(modes[1], args[1]);
args[2] = ValueOrVariable(modes[2], args[2]);
result = ToString(Main_Data::game_strings->GetWithMode(str_param, modes[0], args[0], *Main_Data::game_variables));
result = result.erase(args[1], args[2]);
result = Game_Strings::Erase(result, args[1], args[2]);
break;
case 14: //Replace Ex (exRep) <fn(string base, string search, string replacement, bool first)>, edge case: the arg "first" is at ((flags >> 19) & 1). Wtf BingShan
{
int pos = 0;
std::string base, search, replacement;

base = ToString(Main_Data::game_strings->GetWithModeAndPos(str_param, modes[0], args[0], &pos, *Main_Data::game_variables));
search = ToString(Main_Data::game_strings->GetWithModeAndPos(str_param, modes[1], args[1], &pos, *Main_Data::game_variables));
replacement = ToString(Main_Data::game_strings->GetWithModeAndPos(str_param, modes[2], args[2], &pos, *Main_Data::game_variables));
base = Main_Data::game_strings->GetWithModeAndPos(str_param, modes[0], args[0], pos, *Main_Data::game_variables);
search = Main_Data::game_strings->GetWithModeAndPos(str_param, modes[1], args[1], pos, *Main_Data::game_variables);
replacement = Main_Data::game_strings->GetWithModeAndPos(str_param, modes[2], args[2], pos, *Main_Data::game_variables);

auto flags = std::regex_constants::match_default;

std::regex rexp(search);
if (first_flag) {
flags = std::regex_constants::format_first_only;
}

if (first_flag) result = std::regex_replace(base, rexp, replacement, std::regex_constants::format_first_only);
else result = std::regex_replace(base, rexp, replacement);
result = Game_Strings::RegExReplace(base, search, replacement, flags);
break;
}
default:
Expand Down
4 changes: 2 additions & 2 deletions src/game_interpreter_shared.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ StringView Game_Interpreter_Shared::CommandStringOrVariable(lcf::rpg::EventComma
assert(mode_idx != val_idx);

if (static_cast<int>(com.parameters.size()) > std::max(mode_idx, val_idx)) {
return game_strings->GetWithMode(ToString(com.string), com.parameters[mode_idx], com.parameters[val_idx], *game_variables);
return game_strings->GetWithMode(com.string, com.parameters[mode_idx], com.parameters[val_idx], *game_variables);
}

return com.string;
Expand All @@ -181,7 +181,7 @@ StringView Game_Interpreter_Shared::CommandStringOrVariableBitfield(lcf::rpg::Ev

if (static_cast<int>(com.parameters.size()) >= std::max(mode_idx, val_idx) + 1) {
int mode = com.parameters[mode_idx];
return game_strings->GetWithMode(ToString(com.string), (mode & (0xF << shift * 4)) >> shift * 4, com.parameters[val_idx], *game_variables);
return game_strings->GetWithMode(com.string, (mode & (0xF << shift * 4)) >> shift * 4, com.parameters[val_idx], *game_variables);
}

return com.string;
Expand Down
143 changes: 128 additions & 15 deletions src/game_strings.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -92,20 +92,22 @@ int Game_Strings::ToNum(Str_Params params, int var_id, Game_Variables& variables
num = static_cast<int>(std::strtol(it->second.c_str(), nullptr, 0));

variables.Set(var_id, num);
Game_Map::SetNeedRefresh(true);

Game_Map::SetNeedRefreshForVarChange(var_id);

return num;
}

int Game_Strings::GetLen(Str_Params params, int var_id, Game_Variables& variables) const {
// Note: The length differs between Maniac and EasyRPG due to different internal encoding (utf-8 vs. ansi)

if (params.string_id <= 0) {
return -1;
}

int len = Get(params.string_id).length();
int len = Utils::UTF8Length(Get(params.string_id));
variables.Set(var_id, len);
Game_Map::SetNeedRefresh(true);

Game_Map::SetNeedRefreshForVarChange(var_id);

return len;
}

Expand All @@ -118,9 +120,14 @@ int Game_Strings::InStr(Str_Params params, std::string search, int var_id, int b
search = Extract(search, params.hex);
}

int index = Get(params.string_id).find(search, begin);
auto search32 = Utils::DecodeUTF32(search);
auto string32 = Utils::DecodeUTF32(Get(params.string_id));

int index = string32.find(search32, begin);
variables.Set(var_id, index);
Game_Map::SetNeedRefresh(true);

Game_Map::SetNeedRefreshForVarChange(var_id);

return index;
}

Expand Down Expand Up @@ -161,6 +168,7 @@ int Game_Strings::Split(Str_Params params, const std::string& delimiter, int str
if (str.find(delimiter) == std::string::npos) {
// token not found
} else {
// This works for UTF-8
std::string token;
for (auto index = str.find(delimiter); index != std::string::npos; index = str.find(delimiter)) {
token = str.substr(0, index);
Expand All @@ -175,6 +183,9 @@ int Game_Strings::Split(Str_Params params, const std::string& delimiter, int str
// set the remaining string
Set(params, str);
variables.Set(var_id, components);

Game_Map::SetNeedRefreshForVarChange(var_id);

return components;
}

Expand Down Expand Up @@ -277,24 +288,31 @@ StringView Game_Strings::PopLine(Str_Params params, int offset, int string_out_i
}

StringView Game_Strings::ExMatch(Str_Params params, std::string expr, int var_id, int begin, int string_out_id, Game_Variables& variables) {
// std::regex only works with char and wchar, not char32
// For full Unicode support requires the w-API, even on non-Windows systems
int var_result;
std::string str_result;
std::smatch match;

if (params.extract) {
expr = Extract(expr, params.hex);
}

std::string base = ToString(Get(params.string_id)).erase(0, begin);
std::regex r(expr);
auto source = Get(params.string_id);
std::string base = Substring(source, begin, Utils::UTF8Length(source));

std::regex_search(base, match, r);
std::wsmatch match;
auto wbase = Utils::ToWideString(base);
auto wexpr = Utils::ToWideString(expr);

std::wregex r(wexpr);

std::regex_search(wbase, match, r);
str_result = Utils::FromWideString(match.str());

var_result = match.position() + begin;
variables.Set(var_id, var_result);
Game_Map::SetNeedRefresh(true);
Game_Map::SetNeedRefreshForVarChange(var_id);

str_result = match.str();
if (string_out_id > 0) {
params.string_id = string_out_id;
Set(params, str_result);
Expand Down Expand Up @@ -337,8 +355,18 @@ const Game_Strings::Strings_t& Game_Strings::RangeOp(Str_Params params, int stri
}

std::string Game_Strings::PrependMin(StringView string, int min_size, char c) {
if (static_cast<int>(string.size()) < min_size) {
int s = min_size - string.size();
int len = Utils::UTF8Length(string);

if (min_size < 0) {
// Left adjust
min_size = abs(min_size);
if (len < min_size) {
int s = min_size - len;
return ToString(string) + std::string(s, c);
}
} else if (len < min_size) {
// Right adjust
int s = min_size - len;
return std::string(s, c) + ToString(string);
}
return ToString(string);
Expand All @@ -356,6 +384,91 @@ std::string Game_Strings::Extract(StringView string, bool as_hex) {
return PendingMessage::ApplyTextInsertingCommands(ToString(string), Player::escape_char, cmd_fn);
}

std::string Game_Strings::Substring(StringView source, int begin, int length) {
const char* iter = source.data();
const auto end = source.data() + source.size();

begin = AdjustIndex(source, begin);

if (length < 0) {
length = 0;
}

// Points at start of the substring
auto left = Utils::UTF8Skip(iter, end, begin);

// Points at end of the substring
auto right = Utils::UTF8Skip(left.next, end, length);

if (right.next == nullptr) {
return std::string(left.next, end);
} else {
return std::string(left.next, right.next);
}
}

std::string Game_Strings::Insert(StringView source, StringView what, int where) {
const char* iter = source.data();
const auto end = source.data() + source.size();

where = AdjustIndex(source, where);

// Points at insertion location
auto ret = Utils::UTF8Skip(iter, end, where);

return std::string(source.data(), ret.next) + ToString(what) + std::string(ret.next, end);
}

std::string Game_Strings::Erase(StringView source, int begin, int length) {
const char* iter = source.data();
const auto end = source.data() + source.size();

begin = AdjustIndex(source, begin);

if (length < 0) {
length = 0;
}

// Points at start of deletion
auto left = Utils::UTF8Skip(iter, end, begin);

if (left.next == nullptr) {
return ToString(source);
}

// Points at end of deletion
auto right = Utils::UTF8Skip(left.next, end, length);

std::string ret = std::string(source.data(), left.next);
if (right.next != nullptr) {
ret += std::string(right.next, end);
}

return ret;
}

std::string Game_Strings::RegExReplace(StringView str, StringView search, StringView replace, std::regex_constants::match_flag_type flags) {
// std::regex only works with char and wchar, not char32
// For full Unicode support requires the w-API, even on non-Windows systems
auto wstr = Utils::ToWideString(str);
auto wsearch = Utils::ToWideString(search);
auto wreplace = Utils::ToWideString(replace);

std::wregex rexp(wsearch);

auto result = std::regex_replace(wstr, rexp, wreplace, flags);

return Utils::FromWideString(result);
}

int Game_Strings::AdjustIndex(StringView str, int index) {
if (index >= 0) {
return index;
}

return std::max(Utils::UTF8Length(str) - abs(index), 0);
}

std::optional<std::string> Game_Strings::ManiacsCommandInserter(char ch, const char** iter, const char* end, uint32_t escape_char) {
if (ch == 'S' || ch == 's') {
// \s in a normal message is the speed modifier
Expand Down
38 changes: 29 additions & 9 deletions src/game_strings.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,23 @@
#include "pending_message.h"
#include "player.h"
#include "string_view.h"
#include "utils.h"

#include <regex>

#ifdef HAVE_NLOHMANN_JSON
#include <nlohmann/json.hpp>
#endif

/**
* Game_Strings class.
* For all operations codepoints are used (instead of bytes).
* This way operations that use the length or the index work almost like in Maniac Patch.
* And using codepoints is better anyway because this is a single character.
* With bytes you have the risk to "chop" a character in half.
*
* Where simple to implement UTF8 is used directly.
* In other cases the code does a roundtrip through UTF32.
*/
class Game_Strings {
public:
Expand Down Expand Up @@ -62,7 +72,7 @@ class Game_Strings {
StringView Get(int id) const;
StringView GetIndirect(int id, const Game_Variables& variables) const;
StringView GetWithMode(StringView str_data, int mode, int arg, const Game_Variables& variables) const;
StringView GetWithModeAndPos(StringView str_data, int mode, int arg, int* pos, const Game_Variables& variables);
std::string GetWithModeAndPos(StringView str_data, int mode, int arg, int& pos, const Game_Variables& variables);

#ifdef HAVE_NLOHMANN_JSON
nlohmann::json* ParseJson(int id);
Expand All @@ -83,6 +93,11 @@ class Game_Strings {

static std::string PrependMin(StringView string, int min_size, char c);
static std::string Extract(StringView string, bool as_hex);
static std::string Substring(StringView source, int begin, int length);
static std::string Insert(StringView source, StringView what, int where);
static std::string Erase(StringView source, int begin, int length);
static std::string RegExReplace(StringView str, StringView search, StringView replace, std::regex_constants::match_flag_type flags = std::regex_constants::match_default);
static int AdjustIndex(StringView str, int index);

static std::optional<std::string> ManiacsCommandInserter(char ch, const char** iter, const char* end, uint32_t escape_char);
static std::optional<std::string> ManiacsCommandInserterHex(char ch, const char** iter, const char* end, uint32_t escape_char);
Expand Down Expand Up @@ -197,18 +212,23 @@ inline StringView Game_Strings::GetWithMode(StringView str_data, int mode, int a
}
}

inline StringView Game_Strings::GetWithModeAndPos(StringView str_data, int mode, int arg, int* pos, const Game_Variables& variables) {
StringView ret;
inline std::string Game_Strings::GetWithModeAndPos(StringView str_data, int mode, int arg, int& pos, const Game_Variables& variables) {
std::string ret;
switch (mode) {
case StringEvalMode::eStringEval_Text:
assert(pos);
ret = str_data.substr(*pos, arg);
*pos += arg;
case StringEvalMode::eStringEval_Text: {
const auto end = str_data.data() + str_data.size();

auto left = Utils::UTF8Skip(str_data.begin(), end, pos);
auto right = Utils::UTF8Skip(left.next, end, arg);

ret = std::string(left.next, right.next);
pos += arg;
return ret;
}
case StringEvalMode::eStringEval_Direct:
return Get(arg);
return ToString(Get(arg));
case StringEvalMode::eStringEval_Indirect:
return GetIndirect(arg, variables);
return ToString(GetIndirect(arg, variables));
default:
return ret;
}
Expand Down
Loading

0 comments on commit 6a45364

Please sign in to comment.