263 lines
7.2 KiB
Python
263 lines
7.2 KiB
Python
#!/usr/bin/env python3
|
||
"""
|
||
Convert il2cpp.json to a flat binary format (.bin) for fast loading.
|
||
|
||
Pre-processes all string parsing (parseGroup, parseDotNetSignature) so the
|
||
C++ Init function can load pre-computed data directly, avoiding JSON parsing
|
||
and regex-like string operations at app startup.
|
||
|
||
Binary format (all integers little-endian):
|
||
|
||
Header (16 bytes):
|
||
char[4] magic = "ILCB"
|
||
uint32 version = 1
|
||
uint32 method_count
|
||
uint32 total_param_count
|
||
|
||
MethodEntry[method_count] (each 48 bytes):
|
||
uint32 assembly_off, assembly_len (into string pool)
|
||
uint32 namespace_off, namespace_len
|
||
uint32 classname_off, classname_len
|
||
uint32 methodname_off, methodname_len
|
||
uint32 param_count
|
||
uint32 params_start_idx (index into ParamRef array)
|
||
uint64 rva
|
||
|
||
ParamRef[total_param_count] (each 8 bytes):
|
||
uint32 str_off, str_len (into string pool)
|
||
|
||
StringPool:
|
||
raw UTF-8 bytes (no null terminators; lengths are explicit)
|
||
|
||
Usage:
|
||
python convert_il2cpp_json_to_bin.py <il2cpp.json> [output.bin]
|
||
"""
|
||
|
||
import json
|
||
import struct
|
||
import sys
|
||
from pathlib import Path
|
||
|
||
|
||
def parse_group(group: str):
|
||
"""
|
||
Parse `group` field into (assembly, namespace, class_name).
|
||
"Assembly-CSharp.dll/Campus/OutGame/SomePresenter"
|
||
-> ("Assembly-CSharp.dll", "Campus.OutGame", "SomePresenter")
|
||
"""
|
||
dll_pos = group.find(".dll")
|
||
if dll_pos == -1:
|
||
return None
|
||
assembly = group[:dll_pos + 4]
|
||
|
||
rest_start = dll_pos + 4
|
||
if rest_start < len(group) and group[rest_start] == '/':
|
||
rest_start += 1
|
||
if rest_start >= len(group):
|
||
return None
|
||
|
||
rest = group[rest_start:]
|
||
parts = rest.split('/')
|
||
if not parts:
|
||
return None
|
||
|
||
class_name = parts[-1]
|
||
namespace = '.'.join(parts[:-1])
|
||
return assembly, namespace, class_name
|
||
|
||
|
||
def split_params(param_str: str):
|
||
"""Bracket-aware split of a parameter list string by comma."""
|
||
if not param_str.strip():
|
||
return []
|
||
|
||
result = []
|
||
depth = 0
|
||
current = []
|
||
|
||
for c in param_str:
|
||
if c in ('[', '<', '('):
|
||
depth += 1
|
||
current.append(c)
|
||
elif c in (']', '>', ')'):
|
||
depth -= 1
|
||
current.append(c)
|
||
elif c == ',' and depth == 0:
|
||
t = ''.join(current).strip()
|
||
if t:
|
||
result.append(t)
|
||
current = []
|
||
else:
|
||
current.append(c)
|
||
|
||
t = ''.join(current).strip()
|
||
if t:
|
||
result.append(t)
|
||
return result
|
||
|
||
|
||
def parse_dot_net_signature(sig: str):
|
||
"""
|
||
Parse dotNetSignature into (method_name, [param_types]).
|
||
"Void SetItemModels(IReadOnlyList`1[X])"
|
||
-> ("SetItemModels", ["IReadOnlyList`1[X]"])
|
||
"""
|
||
paren_open = sig.find('(')
|
||
if paren_open == -1:
|
||
return None
|
||
|
||
prefix = sig[:paren_open]
|
||
last_space = prefix.rfind(' ')
|
||
method_name = prefix[last_space + 1:] if last_space != -1 else prefix
|
||
|
||
if not method_name:
|
||
return None
|
||
|
||
paren_close = sig.rfind(')')
|
||
if paren_close == -1 or paren_close <= paren_open:
|
||
return method_name, []
|
||
|
||
param_str = sig[paren_open + 1:paren_close].strip()
|
||
param_types = split_params(param_str)
|
||
return method_name, param_types
|
||
|
||
|
||
def parse_hex_address(hex_str: str) -> int:
|
||
try:
|
||
return int(hex_str, 16)
|
||
except (ValueError, TypeError):
|
||
return 0
|
||
|
||
|
||
class StringPool:
|
||
"""Deduplicating UTF-8 string pool."""
|
||
|
||
def __init__(self):
|
||
self._pool = bytearray()
|
||
self._cache: dict[str, tuple[int, int]] = {}
|
||
|
||
def add(self, s: str) -> tuple[int, int]:
|
||
if s in self._cache:
|
||
return self._cache[s]
|
||
encoded = s.encode('utf-8')
|
||
offset = len(self._pool)
|
||
length = len(encoded)
|
||
self._pool.extend(encoded)
|
||
self._cache[s] = (offset, length)
|
||
return offset, length
|
||
|
||
def data(self) -> bytes:
|
||
return bytes(self._pool)
|
||
|
||
|
||
HEADER_FMT = '<4sIII' # magic(4) + version + method_count + total_param_count
|
||
METHOD_FMT = '<IIIIIIIIIIQ' # 10×uint32 + 1×uint64 = 48 bytes
|
||
PARAM_FMT = '<II' # 2×uint32 = 8 bytes
|
||
|
||
HEADER_SIZE = struct.calcsize(HEADER_FMT) # 16
|
||
METHOD_SIZE = struct.calcsize(METHOD_FMT) # 48
|
||
PARAM_SIZE = struct.calcsize(PARAM_FMT) # 8
|
||
|
||
|
||
def convert(input_path: str, output_path: str):
|
||
with open(input_path, 'r', encoding='utf-8') as f:
|
||
root = json.load(f)
|
||
|
||
defs = root.get("addressMap", {}).get("methodDefinitions", [])
|
||
|
||
pool = StringPool()
|
||
methods = []
|
||
total_params = 0
|
||
error_count = 0
|
||
|
||
for entry in defs:
|
||
group = entry.get("group")
|
||
dot_net_sig = entry.get("dotNetSignature")
|
||
va_str = entry.get("virtualAddress")
|
||
|
||
if not group or not dot_net_sig or not va_str:
|
||
error_count += 1
|
||
continue
|
||
|
||
parsed_group = parse_group(group)
|
||
if not parsed_group:
|
||
error_count += 1
|
||
continue
|
||
assembly, namespace, class_name = parsed_group
|
||
|
||
parsed_sig = parse_dot_net_signature(dot_net_sig)
|
||
if not parsed_sig:
|
||
error_count += 1
|
||
continue
|
||
method_name, param_types = parsed_sig
|
||
|
||
rva = parse_hex_address(va_str)
|
||
|
||
asm_off, asm_len = pool.add(assembly)
|
||
ns_off, ns_len = pool.add(namespace)
|
||
cls_off, cls_len = pool.add(class_name)
|
||
meth_off, meth_len = pool.add(method_name)
|
||
|
||
param_refs = []
|
||
for pt in param_types:
|
||
pt_off, pt_len = pool.add(pt)
|
||
param_refs.append((pt_off, pt_len))
|
||
|
||
methods.append({
|
||
'assembly': (asm_off, asm_len),
|
||
'namespace': (ns_off, ns_len),
|
||
'classname': (cls_off, cls_len),
|
||
'methodname': (meth_off, meth_len),
|
||
'param_count': len(param_types),
|
||
'params_start_idx': total_params,
|
||
'param_refs': param_refs,
|
||
'rva': rva,
|
||
})
|
||
total_params += len(param_types)
|
||
|
||
# --- build binary ---
|
||
header = struct.pack(HEADER_FMT, b'ILCB', 1, len(methods), total_params)
|
||
|
||
method_buf = bytearray()
|
||
for m in methods:
|
||
method_buf.extend(struct.pack(
|
||
METHOD_FMT,
|
||
m['assembly'][0], m['assembly'][1],
|
||
m['namespace'][0], m['namespace'][1],
|
||
m['classname'][0], m['classname'][1],
|
||
m['methodname'][0], m['methodname'][1],
|
||
m['param_count'],
|
||
m['params_start_idx'],
|
||
m['rva'],
|
||
))
|
||
|
||
param_buf = bytearray()
|
||
for m in methods:
|
||
for pr in m['param_refs']:
|
||
param_buf.extend(struct.pack(PARAM_FMT, pr[0], pr[1]))
|
||
|
||
string_data = pool.data()
|
||
|
||
with open(output_path, 'wb') as f:
|
||
f.write(header)
|
||
f.write(method_buf)
|
||
f.write(param_buf)
|
||
f.write(string_data)
|
||
|
||
total_size = HEADER_SIZE + len(method_buf) + len(param_buf) + len(string_data)
|
||
print(f"Done: {len(methods)} methods ({error_count} skipped) -> {output_path}")
|
||
print(f" String pool : {len(string_data):,} bytes")
|
||
print(f" Total params: {total_params:,}")
|
||
print(f" File size : {total_size:,} bytes")
|
||
|
||
|
||
def main():
|
||
input_file = input("Enter the path to the il2cpp.json file: ")
|
||
output_file = str(Path(input_file).with_suffix('.bin'))
|
||
|
||
convert(input_file, output_file)
|
||
|
||
|
||
if __name__ == '__main__':
|
||
main()
|