gkms-localify-ios/convert_il2cpp_json_to_bin.py

263 lines
7.2 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
"""
Convert il2cpp.json to a flat binary format (.bin) for fast loading.
Pre-processes all string parsing (parseGroup, parseDotNetSignature) so the
C++ Init function can load pre-computed data directly, avoiding JSON parsing
and regex-like string operations at app startup.
Binary format (all integers little-endian):
Header (16 bytes):
char[4] magic = "ILCB"
uint32 version = 1
uint32 method_count
uint32 total_param_count
MethodEntry[method_count] (each 48 bytes):
uint32 assembly_off, assembly_len (into string pool)
uint32 namespace_off, namespace_len
uint32 classname_off, classname_len
uint32 methodname_off, methodname_len
uint32 param_count
uint32 params_start_idx (index into ParamRef array)
uint64 rva
ParamRef[total_param_count] (each 8 bytes):
uint32 str_off, str_len (into string pool)
StringPool:
raw UTF-8 bytes (no null terminators; lengths are explicit)
Usage:
python convert_il2cpp_json_to_bin.py <il2cpp.json> [output.bin]
"""
import json
import struct
import sys
from pathlib import Path
def parse_group(group: str):
"""
Parse `group` field into (assembly, namespace, class_name).
"Assembly-CSharp.dll/Campus/OutGame/SomePresenter"
-> ("Assembly-CSharp.dll", "Campus.OutGame", "SomePresenter")
"""
dll_pos = group.find(".dll")
if dll_pos == -1:
return None
assembly = group[:dll_pos + 4]
rest_start = dll_pos + 4
if rest_start < len(group) and group[rest_start] == '/':
rest_start += 1
if rest_start >= len(group):
return None
rest = group[rest_start:]
parts = rest.split('/')
if not parts:
return None
class_name = parts[-1]
namespace = '.'.join(parts[:-1])
return assembly, namespace, class_name
def split_params(param_str: str):
"""Bracket-aware split of a parameter list string by comma."""
if not param_str.strip():
return []
result = []
depth = 0
current = []
for c in param_str:
if c in ('[', '<', '('):
depth += 1
current.append(c)
elif c in (']', '>', ')'):
depth -= 1
current.append(c)
elif c == ',' and depth == 0:
t = ''.join(current).strip()
if t:
result.append(t)
current = []
else:
current.append(c)
t = ''.join(current).strip()
if t:
result.append(t)
return result
def parse_dot_net_signature(sig: str):
"""
Parse dotNetSignature into (method_name, [param_types]).
"Void SetItemModels(IReadOnlyList`1[X])"
-> ("SetItemModels", ["IReadOnlyList`1[X]"])
"""
paren_open = sig.find('(')
if paren_open == -1:
return None
prefix = sig[:paren_open]
last_space = prefix.rfind(' ')
method_name = prefix[last_space + 1:] if last_space != -1 else prefix
if not method_name:
return None
paren_close = sig.rfind(')')
if paren_close == -1 or paren_close <= paren_open:
return method_name, []
param_str = sig[paren_open + 1:paren_close].strip()
param_types = split_params(param_str)
return method_name, param_types
def parse_hex_address(hex_str: str) -> int:
try:
return int(hex_str, 16)
except (ValueError, TypeError):
return 0
class StringPool:
"""Deduplicating UTF-8 string pool."""
def __init__(self):
self._pool = bytearray()
self._cache: dict[str, tuple[int, int]] = {}
def add(self, s: str) -> tuple[int, int]:
if s in self._cache:
return self._cache[s]
encoded = s.encode('utf-8')
offset = len(self._pool)
length = len(encoded)
self._pool.extend(encoded)
self._cache[s] = (offset, length)
return offset, length
def data(self) -> bytes:
return bytes(self._pool)
HEADER_FMT = '<4sIII' # magic(4) + version + method_count + total_param_count
METHOD_FMT = '<IIIIIIIIIIQ' # 10×uint32 + 1×uint64 = 48 bytes
PARAM_FMT = '<II' # 2×uint32 = 8 bytes
HEADER_SIZE = struct.calcsize(HEADER_FMT) # 16
METHOD_SIZE = struct.calcsize(METHOD_FMT) # 48
PARAM_SIZE = struct.calcsize(PARAM_FMT) # 8
def convert(input_path: str, output_path: str):
with open(input_path, 'r', encoding='utf-8') as f:
root = json.load(f)
defs = root.get("addressMap", {}).get("methodDefinitions", [])
pool = StringPool()
methods = []
total_params = 0
error_count = 0
for entry in defs:
group = entry.get("group")
dot_net_sig = entry.get("dotNetSignature")
va_str = entry.get("virtualAddress")
if not group or not dot_net_sig or not va_str:
error_count += 1
continue
parsed_group = parse_group(group)
if not parsed_group:
error_count += 1
continue
assembly, namespace, class_name = parsed_group
parsed_sig = parse_dot_net_signature(dot_net_sig)
if not parsed_sig:
error_count += 1
continue
method_name, param_types = parsed_sig
rva = parse_hex_address(va_str)
asm_off, asm_len = pool.add(assembly)
ns_off, ns_len = pool.add(namespace)
cls_off, cls_len = pool.add(class_name)
meth_off, meth_len = pool.add(method_name)
param_refs = []
for pt in param_types:
pt_off, pt_len = pool.add(pt)
param_refs.append((pt_off, pt_len))
methods.append({
'assembly': (asm_off, asm_len),
'namespace': (ns_off, ns_len),
'classname': (cls_off, cls_len),
'methodname': (meth_off, meth_len),
'param_count': len(param_types),
'params_start_idx': total_params,
'param_refs': param_refs,
'rva': rva,
})
total_params += len(param_types)
# --- build binary ---
header = struct.pack(HEADER_FMT, b'ILCB', 1, len(methods), total_params)
method_buf = bytearray()
for m in methods:
method_buf.extend(struct.pack(
METHOD_FMT,
m['assembly'][0], m['assembly'][1],
m['namespace'][0], m['namespace'][1],
m['classname'][0], m['classname'][1],
m['methodname'][0], m['methodname'][1],
m['param_count'],
m['params_start_idx'],
m['rva'],
))
param_buf = bytearray()
for m in methods:
for pr in m['param_refs']:
param_buf.extend(struct.pack(PARAM_FMT, pr[0], pr[1]))
string_data = pool.data()
with open(output_path, 'wb') as f:
f.write(header)
f.write(method_buf)
f.write(param_buf)
f.write(string_data)
total_size = HEADER_SIZE + len(method_buf) + len(param_buf) + len(string_data)
print(f"Done: {len(methods)} methods ({error_count} skipped) -> {output_path}")
print(f" String pool : {len(string_data):,} bytes")
print(f" Total params: {total_params:,}")
print(f" File size : {total_size:,} bytes")
def main():
input_file = input("Enter the path to the il2cpp.json file: ")
output_file = str(Path(input_file).with_suffix('.bin'))
convert(input_file, output_file)
if __name__ == '__main__':
main()