Skip to content

Commit

Permalink
Implement text compression
Browse files Browse the repository at this point in the history
  • Loading branch information
mdsteele committed Aug 28, 2024
1 parent c67d8f3 commit b9e0f37
Show file tree
Hide file tree
Showing 78 changed files with 3,143 additions and 3,306 deletions.
26 changes: 24 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ MUSIC_OUT_DIR = $(OUTDIR)/music
PCM_OUT_DIR = $(OUTDIR)/pcm
ROOM_OUT_DIR = $(OUTDIR)/rooms
SIM65_OUT_DIR = $(OUTDIR)/sim65
TEXT_OUT_DIR = $(OUTDIR)/text
TILE_OUT_DIR = $(OUTDIR)/tiles
TSET_OUT_DIR = $(OUTDIR)/tilesets

Expand Down Expand Up @@ -67,6 +68,13 @@ ROOM_ROOM_FILES := \
$(patsubst src/rooms/%.bg,$(ROOM_OUT_DIR)/%.room,$(ROOM_BG_FILES))
ROOM_LIB_FILE = $(LIB_OUT_DIR)/rooms.lib

TEXT_TXT_FILES := $(shell find src/text -name '*.txt' | sort)
TEXT_ASM_FILES := \
$(patsubst src/text/%.txt,$(TEXT_OUT_DIR)/%.asm,$(TEXT_TXT_FILES))
TEXT_OBJ_FILES := \
$(patsubst $(TEXT_OUT_DIR)/%.asm,$(TEXT_OUT_DIR)/%.o,$(TEXT_ASM_FILES))
TEXT_LIB_FILE = $(LIB_OUT_DIR)/text.lib

TILE_AHI_FILES := $(shell find src/tiles -name '*.ahi' | sort)
TILE_CHR_FILES := \
$(patsubst src/tiles/%.ahi,$(TILE_OUT_DIR)/%.chr,$(TILE_AHI_FILES))
Expand Down Expand Up @@ -214,6 +222,12 @@ $(MUSIC_OUT_DIR)/%.asm: src/music/%.sng $(SNG2ASM)
@$(SNG2ASM) < $< > $@
.SECONDARY: $(MUSIC_ASM_FILES)

$(TEXT_OUT_DIR)/%.asm: src/text/%.txt build/text2asm.py
@echo "Generating $@"
@mkdir -p $(@D)
@python3 build/text2asm.py $< > $@
.SECONDARY: $(TEXT_ASM_FILES)

$(TSET_OUT_DIR)/%.asm: src/tilesets/%.bg $(BG2TSET) $(TILE_AHI_FILES)
@echo "Generating $@"
@mkdir -p $(@D)
Expand Down Expand Up @@ -317,6 +331,10 @@ $(MUSIC_OUT_DIR)/%.o: $(MUSIC_OUT_DIR)/%.asm $(INC_FILES)
$(compile-asm)
.SECONDARY: $(MUSIC_OBJ_FILES)

$(TEXT_OUT_DIR)/%.o: $(TEXT_OUT_DIR)/%.asm $(INC_FILES)
$(compile-asm)
.SECONDARY: $(TEXT_OBJ_FILES)

$(TSET_OUT_DIR)/%.o: $(TSET_OUT_DIR)/%.asm $(INC_FILES)
$(compile-asm)
.SECONDARY: $(TSET_OBJ_FILES)
Expand All @@ -333,6 +351,9 @@ $(MUSIC_LIB_FILE): $(MUSIC_OBJ_FILES)
$(ROOM_LIB_FILE): $(ROOM_OBJ_FILES)
$(update-archive)

$(TEXT_LIB_FILE): $(TEXT_OBJ_FILES)
$(update-archive)

$(TSET_LIB_FILE): $(TSET_OBJ_FILES)
$(update-archive)

Expand All @@ -341,13 +362,14 @@ $(TSET_LIB_FILE): $(TSET_OBJ_FILES)

$(ROM_BIN_FILE) $(ROM_LABEL_FILE): \
tests/lint.py $(ROM_CFG_FILE) $(ROM_OBJ_FILES) \
$(MUSIC_LIB_FILE) $(ROOM_LIB_FILE) $(TSET_LIB_FILE)
$(MUSIC_LIB_FILE) $(ROOM_LIB_FILE) $(TEXT_LIB_FILE) $(TSET_LIB_FILE)
python3 tests/lint.py
@echo "Linking $@"
@mkdir -p $(@D)
@ld65 -Ln $(ROM_LABEL_FILE) -m $(ROM_MAP_FILE) -o $@ \
-C $(ROM_CFG_FILE) $(ROM_OBJ_FILES) \
$(MUSIC_LIB_FILE) $(ROOM_LIB_FILE) $(TSET_LIB_FILE)
$(MUSIC_LIB_FILE) $(ROOM_LIB_FILE) $(TEXT_LIB_FILE) \
$(TSET_LIB_FILE)
$(ROM_LABEL_FILE): $(ROM_BIN_FILE)

#=============================================================================#
Expand Down
212 changes: 212 additions & 0 deletions build/text2asm.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,212 @@
#=============================================================================#
# Copyright 2022 Matthew D. Steele <[email protected]> #
# #
# This file is part of Annalog. #
# #
# Annalog is free software: you can redistribute it and/or modify it under #
# the terms of the GNU General Public License as published by the Free #
# Software Foundation, either version 3 of the License, or (at your option) #
# any later version. #
# #
# Annalog is distributed in the hope that it will be useful, but WITHOUT ANY #
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS #
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more #
# details. #
# #
# You should have received a copy of the GNU General Public License along #
# with Annalog. If not, see <http://www.gnu.org/licenses/>. #
#=============================================================================#

import os
import sys

#=============================================================================#

MAX_PAIRS = 0xfd - 0x80

HEADER = """\
;;; This file was generated by text2asm.
.INCLUDE "../../src/charmap.inc"
.INCLUDE "../../src/dialog.inc"
;;;=========================================================================;;;
"""

FOOTER = """\
;;;=========================================================================;;;
"""

#=============================================================================#

def parse_text(data):
text = []
current_chars = []
original_size = 0
def finish_string():
if not current_chars: return
string = ''.join(current_chars)
current_chars.clear()
text.append(('b', string))
while data:
if data.startswith('{'):
i = data.find('}')
finish_string()
constant = data[1:i]
data = data[i + 1:]
text.append(('c', constant))
original_size += 1
elif data.startswith('['):
i = data.find(']')
finish_string()
pair = data[1:i]
data = data[i + 1:]
assert len(pair) == 4
text.append(('p', (int(pair[:2], 16), int(pair[2:], 16))))
original_size += 2
else:
char = data[0]
data = data[1:]
current_chars.append(char)
original_size += 1
finish_string()
return (text, original_size)

def parse_input_file(filepath):
texts = {}
current_text_name = None
current_text_data = ''
original_size = 0
for line in open(filepath):
line = line.rstrip('\n')
if current_text_name is not None:
current_text_data += line
if line.endswith('#') or line.endswith('%'):
(text, size) = parse_text(current_text_data)
texts[current_text_name] = text
original_size += size
current_text_name = None
current_text_data = ''
else:
current_text_data += '$'
else:
if not line: continue
elif line.startswith('#'): continue
elif line.startswith('@'):
current_text_name = line[1:]
else:
raise ValueError('bad line: ' + repr(line))
assert current_text_name is None
return (texts, original_size)

def compute_pairs(texts):
forced_pairs = set()
pair_counts = {}
for text in texts.values():
for kind, value in text:
if kind == 'p':
forced_pairs.add(value)
elif kind == 'b':
for i in range(0, len(value) - 1):
pair = value[i:i + 2]
if pair not in pair_counts:
pair_counts[pair] = 0
pair_counts[pair] += 1
sorted_counts = sorted(pair_counts.items(), key=lambda item: -item[1])
best_pairs = [pair for pair, count in sorted_counts if count > 2]
return sorted(forced_pairs) + best_pairs[:MAX_PAIRS - len(forced_pairs)]

def compress_text(text, dictionary):
compressed_size = 0
result_lines = []
current_line = []
current_chars = []
def finish_string():
if not current_chars: return
string = ''.join(current_chars)
current_chars.clear()
current_line.append(f'"{string}"')
def finish_line():
finish_string()
if not current_line: return
line = ', '.join(current_line)
current_line.clear()
result_lines.append(f' .byte {line}\n')
for kind, value in text:
if kind == 'b':
while len(value) >= 2:
pair = value[:2]
i = dictionary.get(pair)
if i is not None:
value = value[2:]
finish_string()
current_line.append(f'${i + 0x80:02x}')
compressed_size += 1
if '$' in pair: finish_line()
else:
char = value[0]
value = value[1:]
current_chars.append(char)
compressed_size += 1
if char == '$': finish_line()
if value:
current_chars.append(value)
compressed_size += len(value)
if value.endswith('$'): finish_line()
elif kind == 'c':
finish_string()
current_line.append(value)
compressed_size += 1
elif kind == 'p':
i = dictionary[value]
finish_string()
current_line.append(f'${i + 0x80:02x}')
compressed_size += 1
else: assert False
finish_line()
return (''.join(result_lines), compressed_size)

def write_output_file(bank, pairs, texts, original_data_size):
compressed_data_size = 0
sys.stdout.write(HEADER)
sys.stdout.write(f'.SEGMENT "PRGA_{bank}"\n\n')
sys.stdout.write(f'.EXPORT DataA_{bank}_Strings_u8_arr2_arr\n')
sys.stdout.write(f'.PROC DataA_{bank}_Strings_u8_arr2_arr\n')
for pair in pairs:
if isinstance(pair, str):
sys.stdout.write(f' .byte "{pair}"\n')
else:
sys.stdout.write(f' .byte ${pair[0]:02x}, ${pair[1]:02x}\n')
compressed_data_size += 2
sys.stdout.write(f'.ENDPROC\n')
dictionary = {pair: i for i, pair in enumerate(pairs)}
for name, text in sorted(texts.items()):
sys.stdout.write(f'\n.EXPORT DataA_{bank}_{name}_u8_arr\n')
sys.stdout.write(f'.PROC DataA_{bank}_{name}_u8_arr\n')
(compressed_asm, compressed_size) = compress_text(text, dictionary)
sys.stdout.write(compressed_asm)
sys.stdout.write(f'.ENDPROC\n')
compressed_data_size += compressed_size
sys.stdout.write(f'\n;;; Original size = {original_data_size:4x}\n')
sys.stdout.write(f';;; Compressed size = {compressed_data_size:4x}\n')
saved = original_data_size - compressed_data_size
sys.stdout.write(f';;; Bytes saved = {saved:4x}\n')
percent = int(round(100 * (1 - compressed_data_size / original_data_size)))
sys.stdout.write(f';;; Percent saved = {percent:3d}%\n')
sys.stdout.write(FOOTER)

def run(filepath):
filename = os.path.split(filepath)[1]
bank = os.path.splitext(filename)[0].capitalize()
(texts, original_size) = parse_input_file(filepath)
pairs = compute_pairs(texts)
write_output_file(bank, pairs, texts, original_size)

#=============================================================================#

if __name__ == '__main__':
run(sys.argv[1])

#=============================================================================#
19 changes: 1 addition & 18 deletions src/dialog.asm
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,7 @@
.IMPORT FuncA_Dialog_PlaySfxDialogText
.IMPORT FuncA_Dialog_PlaySfxQuestMarker
.IMPORT FuncA_Objects_DrawObjectsForRoom
.IMPORT FuncM_CopyDialogText
.IMPORT FuncM_DrawObjectsForRoomAndProcessFrame
.IMPORT FuncM_ScrollTowardsAvatar
.IMPORT FuncM_ScrollTowardsGoal
Expand Down Expand Up @@ -422,24 +423,6 @@ _Finish:
jmp_prga MainA_Pause_Papers
.ENDPROC

;;; Given the bank/pointer returned by FuncA_Dialog_GetNextDialogTextPointer,
;;; switches the PRGA bank and copies the dialog text into
;;; Ram_DialogText_u8_arr.
;;; @param T2 The PRGA bank that contains the dialog text.
;;; @param T1T0 A pointer to the start of the dialog text.
.EXPORT FuncM_CopyDialogText
.PROC FuncM_CopyDialogText
main_prga T2
ldy #$ff
@loop:
iny
lda (T1T0), y
sta Ram_DialogText_u8_arr, y
cmp #kDialogTextNewline + 1
blt @loop
rts
.ENDPROC

;;;=========================================================================;;;

.SEGMENT "PRGA_Dialog"
Expand Down
29 changes: 14 additions & 15 deletions src/linker.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -250,21 +250,20 @@ SEGMENTS {
PRGA_Text0: load=PRGA_01, type=ro;
PRGA_Text1: load=PRGA_03, type=ro;
PRGA_Text2: load=PRGA_05, type=ro;
PRGA_Text3: load=PRGA_07, type=ro;
PRGA_Actor: load=PRGA_09, type=ro;
PRGA_Avatar: load=PRGA_0B, type=ro;
PRGA_Console: load=PRGA_0D, type=ro;
PRGA_Cutscene: load=PRGA_0F, type=ro;
PRGA_Death: load=PRGA_11, type=ro;
PRGA_Dialog: load=PRGA_13, type=ro;
PRGA_Machine: load=PRGA_15, type=ro;
PRGA_Objects: load=PRGA_17, type=ro;
PRGA_Pause: load=PRGA_19, type=ro;
PRGA_Room: load=PRGA_1B, type=ro;
PRGA_Terrain: load=PRGA_1D, type=ro;
PRGA_Pcm0: load=PRGA_1F, type=ro;
PRGA_Pcm1: load=PRGA_21, type=ro;
PRGA_Pcm2: load=PRGA_23, type=ro;
PRGA_Actor: load=PRGA_07, type=ro;
PRGA_Avatar: load=PRGA_09, type=ro;
PRGA_Console: load=PRGA_0B, type=ro;
PRGA_Cutscene: load=PRGA_0D, type=ro;
PRGA_Death: load=PRGA_0F, type=ro;
PRGA_Dialog: load=PRGA_11, type=ro;
PRGA_Machine: load=PRGA_13, type=ro;
PRGA_Objects: load=PRGA_15, type=ro;
PRGA_Pause: load=PRGA_17, type=ro;
PRGA_Room: load=PRGA_19, type=ro;
PRGA_Terrain: load=PRGA_1B, type=ro;
PRGA_Pcm0: load=PRGA_1D, type=ro;
PRGA_Pcm1: load=PRGA_1F, type=ro;
PRGA_Pcm2: load=PRGA_21, type=ro;
# Fixed-bank PRG segments:
PRG8: load=PRG8, type=ro;
PRGE_Pcm: load=PRGE, type=ro, align=$100;
Expand Down
Loading

0 comments on commit b9e0f37

Please sign in to comment.