Files
d2warehouse/d2warehouse/parser.py

280 lines
7.8 KiB
Python

import struct
from bitarray import bitarray
from bitarray.util import ba2int
from d2warehouse.stash import Stash, StashTab
from d2warehouse.item import Affix, Item, LowQualityType, Quality
import d2warehouse.huffman as huffman
STASH_TAB_MAGIC = b"\x55\xAA\x55\xAA"
ITEM_DATA_MAGIC = b"JM"
class ParseError(RuntimeError):
pass
class UnsupportedItemError(ParseError):
pass
def ensure_length(data, n):
if len(data) < n:
nearby = data[:10].hex()
raise ParseError(
f"Expected {n} bytes but only {len(data)} bytes are available near {nearby}"
)
def parse_fixed(data: bytes, prefix: bytes) -> bytes:
if data.startswith(prefix):
return data[len(prefix) :]
raise ParseError(f"Expected {prefix} near {data[:10].hex()}")
def parse_bytes(data: bytes, n: int) -> tuple[bytes, bytes]:
ensure_length(data, n)
return data[n:], data[:n]
def parse_u32(data: bytes) -> tuple[bytes, int]:
ensure_length(data, 4)
return data[4:], struct.unpack("<I", data[:4])[0]
def parse_u16(data: bytes) -> tuple[bytes, int]:
ensure_length(data, 2)
return data[2:], struct.unpack("<H", data[:2])[0]
def parse_stash(data) -> Stash:
stash = Stash()
while len(data) > 0:
data, tab = parse_stash_tab(data)
stash.tabs.append(tab)
return stash
def parse_stash_tab(data: bytes) -> tuple[bytes, StashTab]:
data = parse_fixed(data, STASH_TAB_MAGIC)
data, unknown = parse_u32(data)
data, version = parse_u32(data)
if unknown != 1:
ParseError("Unknown stash tab field is not 1")
if version != 99:
ParseError(f"Unsupported stash tab version ({version} instead of 99)")
tab = StashTab()
data, gold = parse_u32(data)
tab.gold = gold
# Length is the total length of the tab data in bytes. This includes all
# fields that are already parsed above.
data, length = parse_u32(data)
# Skip what is probably zero padding (?) but check just in case it contains data
data, pad = parse_bytes(data, 44)
if pad != b"\x00" * 44:
raise ParseError(f"Unexpecteed data in zero padding: {pad}")
# Separate out the item data so we can easilly verify items parsed the correct
# number of bytes
data, item_data = parse_bytes(data, length - 64)
tab.item_data = item_data
tab.items = parse_items(item_data)
return data, tab
def parse_item(data: bytes) -> tuple[bytes, Item]:
bits = bitarray(endian="little")
bits.frombytes(data)
is_identified = bool(bits[4])
is_socketed = bool(bits[11])
is_ear = bool(bits[16])
is_beginner = bool(bits[17])
is_simple = bool(bits[21])
is_ethereal = bool(bits[22])
is_personalized = bool(bits[24])
is_runeword = bool(bits[26])
pos_x = ba2int(bits[42:46])
pos_y = ba2int(bits[46:50])
kind, kind_end = huffman.decode(bits[53:], 3)
kind_end += 53
# TODO: verify that this socket thing is really 1 bit for simple items...?
sockets_end = kind_end + 1 if is_simple else kind_end + 3
sockets_count = ba2int(bits[kind_end:sockets_end])
print("sockets", sockets_count)
if is_ear:
raise UnsupportedItemError("Ear items are not supported")
simple_byte_sz = int((sockets_end + 7) / 8)
print("simple size", simple_byte_sz)
item = Item(
data[:simple_byte_sz],
is_identified,
is_socketed,
is_beginner,
is_simple,
is_ethereal,
is_personalized,
is_runeword,
pos_x,
pos_y,
kind,
)
if is_simple:
return data[simple_byte_sz:], item
item.uid, item.lvl, item.quality = parse_extended_item(bits[sockets_end:])
extended_end = sockets_end + 32 + 7 + 4
item.graphic, graphic_end = parse_item_graphic(bits[extended_end:])
graphic_end += extended_end
item.inherent, inherent_end = parse_inherent_mod(bits[graphic_end:])
inherent_end += graphic_end
print("in", inherent_end)
item, quality_end = parse_quality_data(bits[inherent_end:], item)
quality_end += inherent_end
if item.is_runeword:
item.runeword_id, runeword_end = parse_runeword(bits[quality_end:], item)
else:
runeword_end = quality_end
if item.is_personalized:
item.personal_name, personalized_end = parse_personalization(
bits[runeword_end:], item
)
else:
personalized_end = runeword_end
extended_byte_size = int((personalized_end + 7) / 8)
print("extended size", extended_byte_size)
item.raw_data = data[:]
return b"", item # TODO: properly return remaining data
def parse_item_graphic(bits: bitarray) -> tuple[int | None, int]:
if not bits[0]:
return None, 1
else:
return ba2int(bits[1:4]), 4
def parse_inherent_mod(bits: bitarray) -> tuple[int | None, int]:
if not bits[0]:
return None, 1
else:
return ba2int(bits[1:4]), 4
def parse_extended_item(bits: bitarray) -> tuple[int, int, Quality]:
uid = ba2int(bits[:32])
lvl = ba2int(bits[32:39])
quality = ba2int(bits[39:43])
return uid, lvl, Quality(quality)
def parse_quality_data(bits: bitarray, item: Item) -> tuple[Item, int]:
if item.quality == Quality.LOW:
return parse_low_quality_data(bits, item)
elif item.quality == item.quality.NORMAL:
return item, 0
elif item.quality == item.quality.HIGH:
return parse_high_quality_data(bits, item)
elif item.quality == item.quality.MAGIC:
return parse_magic_data(bits, item)
elif item.quality == item.quality.SET:
return parse_set_data(bits, item)
elif item.quality == item.quality.RARE:
return parse_rare_data(bits, item)
elif item.quality == item.quality.UNIQUE:
return parse_unique_data(bits, item)
elif item.quality == item.quality.CRAFTED:
return parse_rare_data(bits, item) # crafted & rare are the same
def parse_low_quality_data(bits: bitarray, item: Item) -> tuple[Item, int]:
item.low_quality = LowQualityType(ba2int(bits[0:3]))
return item, 3
def parse_high_quality_data(bits: bitarray, item: Item) -> tuple[Item, int]:
# The data for superior item is unknown
return item, 3
def parse_magic_data(bits: bitarray, item: Item) -> tuple[Item, int]:
item.prefixes = [Affix(name_id=ba2int(bits[0:11]))]
item.suffixes = [Affix(name_id=ba2int(bits[11:22]))]
return item, 22
def parse_set_data(bits: bitarray, item: Item) -> tuple[Item, int]:
item.set_id = ba2int(bits[0:12])
return item, 12
def parse_rare_data(bits: bitarray, item: Item) -> tuple[Item, int]:
item.nameword1 = ba2int(bits[0:8])
item.nameword2 = ba2int(bits[8:16])
affixes = []
ptr = 16
for _ in range(0, 6):
(affix, sz) = parse_affix(bits[ptr:])
ptr += sz
affixes.append(affix)
item.prefixes = [affix for affix in affixes[0:3] if affix is not None]
item.suffixes = [affix for affix in affixes[3:6] if affix is not None]
return item, ptr
def parse_unique_data(bits: bitarray, item: Item) -> tuple[Item, int]:
item.unique_id = ba2int(bits[0:12])
return item, 12
def parse_affix(bits: bitarray) -> tuple[Affix | None, int]:
if not bits[0]:
return None, 1
else:
return Affix(name_id=ba2int(bits[1:12])), 12
def parse_runeword(bits: bitarray) -> tuple[int, int]:
id = ba2int(bits[0:12])
return id, 16
def parse_personalization(bits: bitarray) -> tuple[str, int]:
output = ""
ptr = 0
ascii = ba2int(bits[0:7])
while ascii:
output += chr(ascii)
ascii = ba2int(bits[ptr : ptr + 7])
ptr += 7
return output, ptr + 1
def parse_items(data: bytes) -> list[Item]:
data = parse_fixed(data, ITEM_DATA_MAGIC)
data, num = parse_u16(data)
items: list[Item] = []
while data:
data, item = parse_item(data)
items.append(item)
# TODO: check if num == len(items)
return items