git.bytex64.net -- asm89

commit:	ce924fccf9c16684e5da8352de4796c2dbd14a25
author:	Chip
committer:	Chip
date:	Wed Apr 9 20:59:17 2025 -0500
parents:	72323dc20bff4923f8bec9faa8bf9c0b26ff8a1e

Most opcodes implemented. Produces usable objects.

diff --git a/Makefile.fp b/Makefile.fp
line changes: +3/-2
index 30dc404..cbc30b5
--- a/Makefile.fp
+++ b/Makefile.fp

@@ -1,6 +1,7 @@
-fpc = fpc -Cr -g
+fpc = fpc -Cr -O2 -g -Fuutil
 
 all: asm89
 
-asm89: asm89.pas omf.pas opcode.pas compiler.pas parse.pas util.pas
+
+asm89: asm89.pas compiler.pas omf.pas opcode.pas parse.pas util/symtable.pas util/util.pas
 	$(fpc) $<

diff --git a/asm89.pas b/asm89.pas
line changes: +13/-10
index 244ad96..e7ecf3c
--- a/asm89.pas
+++ b/asm89.pas

@@ -1,5 +1,5 @@
 program Asm89;
-uses Compiler, Omf, Util;
+uses Compiler, Omf, Util, SymTable;
 
 const VERSION: string = '0.1';

@@ -10,7 +10,7 @@ var infile: string;
 procedure WriteObjectFile(path: string; obj: CompiledObject);
 var rec: OmfRecord;
     f: OmfFile;
-    i: Integer;
+    se: PSymbolEntry;
     v: Word;
 begin
     OpenOmf(path, f);

@@ -31,12 +31,12 @@ begin
                           the overlay index. }
     RecAddName(rec, 'DATA');
     RecAddName(rec, 'DGROUP');
-    RecAddName(rec, 'IOP');
+    RecAddName(rec, obj.segment_name);
     WriteRecord(f, rec);
 
     NewRecord(rec, SEGDEF);
     { alignment, combine, segment length 5, name index, class index, overlay index }
-    RecAddRelocatableSegment(rec, AlignParagraph, CombinePublic, 5, 4, 2, 1);
+    RecAddRelocatableSegment(rec, AlignParagraph, CombinePublic, obj.buffer.data_len, 4, 2, 1);
     WriteRecord(f, rec);
 
     NewRecord(rec, GRPDEF);

@@ -49,15 +49,16 @@ begin
     WriteRecord(f, rec);
 
     NewRecord(rec, PUBDEF);
-    for i := 0 to 1023 do begin
-        if Length(obj.public_symbols[i]) = 0 then
-            Break;
-        if NOT FindSymbol(obj.public_symbols[i], v) then begin
-            Writeln('Public symbol `', obj.public_symbols[i], '` not defined');
+    RecAddPubDef(rec, 1, 1);
+    se := obj.public_symbols.head;
+    while se <> nil do begin
+        if NOT FindSymbol(obj.symbols, se^.name, v) then begin
+            Writeln('Public symbol `', se^.name, '` not defined');
             Halt(1);
         end;
         { group index, segment index, name, offset, type }
-        RecAddPubDef(rec, 1, 1, obj.public_symbols[i], v, 0);
+        RecAddPubDefName(rec, se^.name, v, 0);
+        se := se^.next;
     end;
     WriteRecord(f, rec);

@@ -65,6 +66,7 @@ begin
     RecAddByte(rec, 0);
     WriteRecord(f, rec);
 
+    Writeln('''', outfile, ''' written, ', FileSize(f.f), ' bytes');
     CloseOmf(f);
 end;

@@ -99,5 +101,6 @@ begin
     Writeln('Assembling ''', infile, ''' to ''', outfile, '''');
 
     Compile(infile, cob);
+    Writeln('Assembly complete');
     WriteObjectFile(outfile, cob);
 end.

diff --git a/compiler.pas b/compiler.pas
line changes: +168/-129
index b060c50..5e77a8a
--- a/compiler.pas
+++ b/compiler.pas

@@ -1,17 +1,13 @@
 unit Compiler;
 interface
 
-uses Opcode;
+uses Opcode, SymTable;
 
 const COMPILE_BUFFER_SIZE: Word = 65535;
 
 type
 SegArray = array[0..32767] of Byte;
 PSegArray = ^SegArray;
-Symbol = record
-    name: string[32];
-    value: Word;
-end;
 CompileBuffer = object
     data: PSegArray;
     data_len: Word;

@@ -26,12 +22,14 @@ end;
 CompiledObject = record
     buffer: CompileBuffer;
     name: string;
-    symbols: array[0..1023] of Symbol;
-    public_symbols: array[0..255] of String;
+    segment_name: string;
+    symbols: SymbolTable;
+    public_symbols: SymbolTable;
 end;
+OpArray = array[1..3] of Operand;
+
+var currentObj: ^CompiledObject;
 
-function FindSymbol(const n: String; var value: Word): Boolean;
-procedure DumpSymbols;
 procedure Compile(const filename: string; var obj: CompiledObject);
 
 implementation

@@ -54,7 +52,9 @@ Parts = record
 end;
 DataDefType = (DataDB, DataDW, DataDD);
 
-var currentObj: ^CompiledObject;
+var
+    currentLine: Integer;
+    linebuf: string;
 
 procedure PartsClear(var pts: Parts);
 begin

@@ -86,13 +86,18 @@ begin
     end;
 end;
 
-procedure DumpSymbols;
-var i: Integer;
+procedure ErrorAtLine(msg: string);
 begin
-    for i := 0 to 1023 do begin
-        if currentObj^.symbols[i].name <> '' then
-            Writeln(currentObj^.symbols[i].name, ': ', currentObj^.symbols[i].value);
-    end;
+    Writeln();
+    Writeln('ERR: ', msg,' on line ', currentLine, ':');
+    Writeln(linebuf);
+end;
+
+procedure WarnAtLine(msg: string);
+begin
+    Writeln();
+    Writeln('WARN: ', msg,' on line ', currentLine, ':');
+    Writeln(linebuf);
 end;
 
 constructor CompileBuffer.Init;

@@ -211,54 +216,16 @@ begin
     Trim(pts.op3);
 end;
 
-procedure DefineSymbol(const n: String; const value: Word);
-var i: Integer;
-begin
-    Writeln('Define symbol ', ToUpCase(n), ': ', value);
-    for i := 0 to 1023 do begin
-        if currentObj^.symbols[i].name = '' then begin
-            currentObj^.symbols[i].name := ToUpCase(n);
-            currentObj^.symbols[i].value := value;
-            Exit;
-        end;
-    end;
-    Writeln('Too many symbols (> 1024): ', n);
-end;
-
-function FindSymbol(const n: String; var value: Word): Boolean;
-var i: Integer;
-begin
-    for i := 0 to 1023 do begin
-        if currentObj^.symbols[i].name = n then begin
-            FindSymbol := true;
-            value := currentObj^.symbols[i].value;
-            Exit;
-        end;
-    end;
-    FindSymbol := false;
-end;
-
 procedure AddPublic(const s: String);
 var i: Byte;
-    j: Word;
     n: String[32];
 begin
-    for j := 0 to 1023 do begin
-        if Length(currentObj^.public_symbols[j]) = 0 then
-            Break;
-    end;
-    if j = 1024 then begin
-        Writeln('Too many public symbols');
-        Halt(1);
-    end;
     i := 1;
     repeat
         n := ConsumeUntilComma(s, i);
         if Length(n) > 0 then begin
-            currentObj^.public_symbols[j] := ToUpCase(Trim(n));
-            Inc(j);
-            Inc(i);
-            ConsumeWhitespace(s, i);
+            DefineSymbol(currentObj^.public_symbols, ToUpCase(Trim(n)), 0);
+            ConsumeWhitespaceAndCommas(s, i);
         end;
     until Length(n) = 0;
 end;

@@ -316,93 +283,148 @@ begin
     until Length(def) = 0;
 end;
 
-procedure CompileInstruction(const pts: Parts);
-var i, j: Integer;
-    instr: array[0..5] of Byte;
-    instr_len: Byte;
-    m: Mnemonic;
-    op: array[1..3] of Operand;
-    ie: ^InstructionEncoding;
-    rbp: Byte;
-    ienc_found: Boolean;
+procedure FindInstruction(const pts: Parts; var m: Mnemonic; var op: OpArray; var ie: PInstructionEncoding);
+var i, lo_enc, hi_enc: Integer;
 begin
-    instr_len := 2;
     m := ParseMnemonic(pts.mnemonic);
-    GetMnemonicEncodingBounds(m, i, j);
-    if i = High(IEnc) then begin
-        Writeln('Instruction not supported: ', m);
-        Halt(2);
+    if m = mInvalid then begin
+        ErrorAtLine('Invalid mnemonic: ' + pts.mnemonic);
+        Halt(1);
+    end;
+    GetMnemonicEncodingBounds(m, lo_enc, hi_enc);
+    if lo_enc = High(IEnc) then begin
+        ErrorAtLine('Internal error: mnemonic not found');
+        Halt(128);
     end;
     { iterate over possible encodings }
-    ienc_found := false;
-    for i := i to j do begin
+    for i := lo_enc to hi_enc do begin
         op[1].Kind := OpAbsent;
         op[2].Kind := OpAbsent;
         op[3].Kind := OpAbsent;
         ie := @IEnc[i];
-        Writeln('  ie ', i, ': ', m, ' ', ie^.o1, ' ', ie^.o2, ' ', ie^.o3);
+        { Writeln('  ie ', i, ': ', m, ' ', ie^.o1, ' ', ie^.o2, ' ', ie^.o3); }
         if (ie^.o1 <> OpAbsent) and not ParseOperand(pts.op1, op[1], ie^.o1) then
             Continue;
         if (ie^.o2 <> OpAbsent) and not ParseOperand(pts.op2, op[2], ie^.o2) then
             Continue;
         if (ie^.o3 <> OpAbsent) and not ParseOperand(pts.op3, op[3], ie^.o3) then
             Continue;
-        ienc_found := true;
-        Break;
+        Exit;
     end;
-    if NOT ienc_found then begin
-        Writeln('Invalid operands for ', m);
-        Writeln('Wanted ', ie^.o1, ' ', ie^.o2, ' ', ie^.o3);
-        Writeln('Got ', op[1].Kind, ' ', op[2].Kind, ' ', op[3].Kind);
-        Halt(1);
+    { We didn't find anything }
+    ErrorAtLine('Invalid operands for ' + MnemonicStr(m));
+    Writeln('Possible valid options:');
+    for i := lo_enc to hi_enc do begin
+        Writeln('    ', MnemonicStr(m), ' ', ShortOperandStr(IEnc[i].o1),
+                                        ' ', ShortOperandStr(IEnc[i].o2),
+                                        ' ', ShortOperandStr(IEnc[i].o3));
     end;
+    Halt(1);
+end;
+
+procedure CompileInstruction(const pts: Parts);
+var i: Integer;
+    instr: array[0..5] of Byte;
+    instr_len: Byte;
+    m: Mnemonic;
+    op: OpArray;
+    ie: PInstructionEncoding;
+    rbp: Byte;
+    disp: Integer;
+begin
+    instr_len := 2;
+    if pts.mnemonic = '' then
+        Exit;
+    FindInstruction(pts, m, op, ie);
     instr[1] := Byte(ie^.op);
-    instr[0] := (Byte(ie^.wb) SHL 3) OR Byte(ie^.w);
-    for i := 1 to 3 do begin
-        case op[i].Kind of
-            OpMemory: begin
-                if (m = mMOV) AND (i = 2) then begin
-                    Writeln('mem-mem MOV not yet implemented');
-                    Halt(2);
-                end else begin
+    instr[0] := Byte(ie^.w);
+    if ie^.wb <> wLocation then
+        instr[0] := instr[0] OR (Byte(ie^.wb) SHL 3);
+    if ((m = mMOV) or (m = mMOVB)) AND (ie^.o1 = OpMemory) AND (ie^.o2 = OpMemory) then begin
+        { mem-mem MOV is a special case that emits two instructions back-to-back. Note that the
+          source (the second operand) is first. }
+        instr[1] := instr[1] OR Byte(op[2].mm);
+        instr[0] := instr[0] OR (Byte(op[2].aa) SHL 1);
+        if op[2].aa = BaseOffset then begin
+            instr[2] := Lo(op[2].offset);
+            Inc(instr_len);
+        end;
+        currentObj^.buffer.Add(@instr, instr_len);
+        instr_len := 2;
+        instr[1] := $CC OR Byte(op[1].mm);
+        instr[0] := Byte(ie^.w) OR (Byte(op[1].aa) SHL 1);
+        if op[1].aa = BaseOffset then begin
+            instr[2] := Lo(op[1].offset);
+            Inc(instr_len);
+        end;
+        currentObj^.buffer.Add(@instr, instr_len);
+        Exit;
+    end
+    else begin
+        for i := 1 to 3 do begin
+            case op[i].Kind of
+                OpMemory: begin
                     instr[1] := instr[1] OR Byte(op[i].mm);
                     instr[0] := instr[0] OR (Byte(op[i].aa) SHL 1);
                     if op[i].aa = BaseOffset then begin
                         instr[2] := Lo(op[i].offset);
-                        instr_len := 3;
+                        Inc(instr_len);
                     end;
-                    if m <> mMOV then
-                        Break;
                 end;
-            end;
-            OpRegister:
-                rbp := Byte(op[i].reg);
-            OpPointer:
-                rbp := Byte(op[i].preg);
-            OpImmediate: begin
-                case ie^.wb of
-                    wOne: begin
-                        instr[instr_len - 1] := Lo(op[i].immed);
-                        instr_len := instr_len + 1;
-                    end;
-                    wTwo: begin
-                        instr[instr_len - 1] := Lo(op[i].immed);
-                        instr[instr_len] := Hi(op[i].immed);
-                        instr_len := instr_len + 2;
+                OpRegister:
+                    rbp := Byte(op[i].reg);
+                OpPointer:
+                    rbp := Byte(op[i].preg);
+                OpImmediate: begin
+                    case ie^.wb of
+                        wOne: begin
+                            if op[i].immed > 255 then
+                                WarnAtLine('byte immediate is > 255');
+                            instr[instr_len] := Lo(op[i].immed);
+                            Inc(instr_len);
+                        end;
+                        wTwo: begin
+                            instr[instr_len] := Lo(op[i].immed);
+                            Inc(instr_len);
+                            instr[instr_len] := Hi(op[i].immed);
+                            Inc(instr_len);
+                        end;
                     end;
                 end;
-            end;
-            OpLocation: begin
-                instr[instr_len - 1] := Lo(op[i].loc);
-                if op[i].loc <= 255 then
-                    instr_len := instr_len + 1
-                else begin
-                    instr[instr_len] := Hi(op[i].loc);
-                    instr_len := instr_len + 2;
+                OpLocation: begin
+                    { The displacement is added to TP at the time of execution, which annoyingly
+                      is already advanced to the instruction after the jump. So where we calculate
+                      from depends on whether it's a one-byte or two-byte displacement, and we
+                      don't know whether we need a two-byte displacement until we calculate the
+                      displacement. :|
+
+                      I *think* this errs on the side of making a two-byte displacement for a case
+                      that would only need one, and not the other way around. But the proof will be
+                      in the testing. }
+                    disp := Integer(op[i].loc) - Integer(currentObj^.buffer.data_len + instr_len + 1);
+                    if (ie^.wb = wTwo) OR (disp > 128) OR (disp < -127) then begin
+                        Dec(disp); { displacement origin moves one byte forward with two-byte displacement }
+                        instr[instr_len] := Lo(Word(disp));
+                        Inc(instr_len);
+                        instr[instr_len] := Hi(Word(disp));
+                        Inc(instr_len);
+                        { Not totally sure if it's necessary to also adjust W, but here it is }
+                        instr[0] := instr[0] OR (Byte(wTwo) SHL 3) OR 1;
+                    end
+                    else begin
+                        instr[instr_len] := Byte(Shortint(disp));
+                        Inc(instr_len);
+                        instr[0] := instr[0] OR (Byte(wOne) SHL 3);
+                    end;
                 end;
+                OpBit:
+                    rbp := Byte(op[i].bit);
+                OpWidth:
+                    if op[i].width = widByte then
+                        rbp := 0
+                    else
+                        rbp := 1;
             end;
-            OpBit:
-                rbp := Byte(op[i].bit);
         end;
     end;
     case ie^.rbp of

@@ -411,13 +433,17 @@ begin
         rbpHlt: instr[0] := instr[0] OR $20;
         rbpSintr: instr[0] := instr[0] OR $40;
         rbpJmp: instr[0] := instr[0] OR $80;
+        rbpWid: instr[0] := instr[0] OR (1 SHL 7)
+                                     OR (Byte(op[1].width) SHL 6)
+                                     OR (Byte(op[2].width) SHL 5);
+        rbpXfer: instr[0] := instr[0] OR $60;
     end;
     currentObj^.buffer.Add(@instr, instr_len);
-    Write('mnemonic: ', m, '; op1: ');
+    { Write('mnemonic: ', m, '; op1: ');
     DumpOperand(op[1]);
     Write('; op2: ');
     DumpOperand(op[2]);
-    Writeln('');
+    Writeln(''); }
 end;
 
 procedure CompileDirective(const pts: Parts);

@@ -426,7 +452,7 @@ begin
     if pts.mnemonic = 'EQU' then begin
         if not ParseOperand(pts.args, op, OpImmediate) then
             DieBadOperand(pts.args);
-        DefineSymbol(pts.label_or_name, op.immed);
+        DefineSymbol(currentObj^.symbols, pts.label_or_name, op.immed);
     end
     else if pts.mnemonic = 'DB' then begin
         DefineData(pts.args, DataDB);

@@ -446,47 +472,60 @@ begin
     else if pts.mnemonic = 'NAME' then begin
         currentObj^.name := pts.op1;
     end
-    else if pts.mnemonic = 'PUBLIC' then begin
-        AddPublic(pts.args);
-    end
     else if pts.mnemonic = 'SEGMENT' then begin
-        Writeln('Segment ', pts.label_or_name, ' begins');
+        if currentObj^.segment_name = '' then
+            currentObj^.segment_name := pts.label_or_name
+        else
+            DieBadOperand('Segment has already been defined');
     end
     else if pts.mnemonic = 'ENDS' then begin
-        Writeln('Segment ', pts.label_or_name, ' ends');
+        if pts.label_or_name <> currentObj^.segment_name then
+            DieBadOperand('Segment end name is not the same as the beginning');
+        { TODO: make sure no instructions appear outside of the segment bounds }
+    end
+    else if pts.mnemonic = 'PUBLIC' then begin
+        AddPublic(pts.args);
     end
+    else if pts.mnemonic = 'EXTRN' then begin
+        Writeln('EXTRN not yet implemented');
+        Halt(1);
+    end
+    else if pts.mnemonic = 'END' then begin
+        { Not sure what to do with this }
+    end;
 end;
 
 procedure Compile(const filename: string; var obj: CompiledObject);
 var f: Text;
-    linebuf: string;
     pts: Parts;
 begin
     currentObj := @obj;  { poor man's `this` pointer }
     obj.buffer.Init;
     Assign(f, filename);
     Reset(f);
+    currentLine := 0;
     while not Eof(f) do begin
         Readln(f, linebuf);
-        Writeln('*** ', linebuf);
+        Inc(currentLine);
+        { Writeln('*** ', linebuf); }
         StripComments(linebuf);
         if Length(linebuf) = 0 then { nothing left on the line }
             Continue;
         PartsClear(pts);
         PickParts(linebuf, pts);
-        PartsDump(pts);
+        { PartsDump(pts); }
 
         if (pts.label_or_name <> '') and
             not ((pts.mnemonic = 'EQU') or (pts.mnemonic = 'STRUC') or
                  (pts.mnemonic = 'SEGMENT') or (pts.mnemonic = 'ENDS')) then
-            DefineSymbol(pts.label_or_name, obj.buffer.data_len);
+            DefineSymbol(obj.symbols, pts.label_or_name, obj.buffer.data_len);
 
         case pts.Kind of
             partsDirective: CompileDirective(pts);
             partsInstruction: CompileInstruction(pts);
         end;
     end;
-    DumpSymbols;
+    { DumpSymbols; }
 end;
 
 end.

diff --git a/examples/badopcode.asm b/examples/badopcode.asm
line changes: +4/-0
index 0000000..7c571d8
--- /dev/null
+++ b/examples/badopcode.asm

@@ -0,0 +1,4 @@
+IOP	SEGMENT
+	; The 8089 has no register-register move
+	mov ga, gb
+IOP	ENDS

diff --git a/examples/data.asm b/examples/data.asm
line changes: +3/-3
index 1b5f6df..2a18ffe
--- a/examples/data.asm
+++ b/examples/data.asm

@@ -1,10 +1,10 @@
 	name data
 	public _test_data
 
-data	SEGMENT
+IOP	SEGMENT
 _test_data:
-	db 1, 20h, 'hello'
+	db 1, 20h, 'hello', 0
 	dw AA55h, 'X', 'AB'
 	dd 42
 	ds 8
-data	ENDS
+IOP	ENDS

diff --git a/examples/scrfuzz.asm b/examples/scrfuzz.asm
line changes: +0/-10
index 00ee022..0000000
--- a/examples/scrfuzz.asm
+++ /dev/null

@@ -1,10 +0,0 @@
-	name ScreenFuzz
-	public _ScreenFuzz
-
-; increments screen memory in an infinite loop
-_ScreenFuzz:
-	lpd ga, [pp].4  ; pp.4 is the input buffer address
-	movi ix, 0
-loop:	inc [ga+ix+]
-	andi ix, 07ffh
-	jmp loop

diff --git a/examples/scrfuzz/Makefile b/examples/scrfuzz/Makefile
line changes: +5/-0
index 0000000..7d143c6
--- /dev/null
+++ b/examples/scrfuzz/Makefile

@@ -0,0 +1,5 @@
+main.exe: main.obj scrfuzz.obj
+	tlink /s main.obj+scrfuzz.obj+C:\TC\LIB\C0C.OBJ,main.exe,main.map,C:\TC\LIB\CC.LIB
+
+main.obj: main.c
+	tcc -c -mc main.c

diff --git a/examples/scrfuzz/main.c b/examples/scrfuzz/main.c
line changes: +37/-0
index 0000000..f1f479f
--- /dev/null
+++ b/examples/scrfuzz/main.c

@@ -0,0 +1,37 @@
+#include <stdio.h>
+#include <dos.h>
+
+extern unsigned char screen_fuzz;
+
+struct i89_channel_parameter_block {
+	void far * tbp;
+};
+
+struct i89_channel {
+	unsigned char ccw;
+	unsigned char busy;
+	struct i89_channel_parameter_block far * ppb;
+	unsigned int reserved;
+};
+
+struct screenfuzz_parameter_block {
+	struct i89_channel_parameter_block cpb;
+	unsigned int far * screen_mem;
+};
+
+struct screenfuzz_parameter_block fuzz_cpb = {
+	{ &screen_fuzz },
+	0,
+};
+
+int main(int argc, char* argv) {
+	struct i89_channel far * ccb = (struct i89_channel *) MK_FP(0x0000, 0x0500);
+	printf("channel 1 busy %02x\n", ccb[0].busy);
+	printf("channel 2 busy %02x\n", ccb[1].busy);
+	ccb[1].ccw = 0x23;
+	ccb[1].ppb = &(fuzz_cpb.cpb);
+	fuzz_cpb.screen_mem = (unsigned int far *) MK_FP(0xF000,0x0000);
+	outportb(0x72, 0);
+	printf("channel 1 busy %02x\n", ccb[0].busy);
+	printf("channel 2 busy %02x\n", ccb[1].busy);
+}

diff --git a/examples/scrfuzz/scrfuzz.asm b/examples/scrfuzz/scrfuzz.asm
line changes: +14/-0
index 0000000..1c5c4cc
--- /dev/null
+++ b/examples/scrfuzz/scrfuzz.asm

@@ -0,0 +1,14 @@
+	name ScreenFuzz
+	public _screen_fuzz
+
+IOP	SEGMENT
+
+; increments screen memory in an infinite loop
+_screen_fuzz:
+	lpd ga, [pp].4  ; pp.4 is the input buffer address
+	movi ix, 0
+loop:	inc [ga+ix+]
+	andi ix, 07ffh
+	jmp loop
+
+IOP	ENDS

diff --git a/omf.pas b/omf.pas
line changes: +9/-4
index c301d82..66dcfad
--- a/omf.pas
+++ b/omf.pas

@@ -46,8 +46,9 @@ procedure RecAddRelocatableSegment(var rec: OmfRecord; const alignment: SegmentA
                                    const len: Word; const name: Word; const class: Word;
                                    const overlay: Word);
 procedure RecAddGroupDef(var rec: OmfRecord; const groupname: Word; const segment: Word);
-procedure RecAddPubDef(var rec: OmfRecord; const group: Word; const segment: Word;
-                       const name: string; const offset: Word; const typeidx: Byte);
+procedure RecAddPubDef(var rec: OmfRecord; const group: Word; const segment: Word);
+procedure RecAddPubDefName(var rec: OmfRecord; const name: string; const offset: Word;
+                           const typeidx: Byte);
 procedure RecAddEnumeratedData(var rec: OmfRecord; const segment: Word;
                                const offset: Word; const data: array of Byte;
                                const len: Word);

@@ -191,11 +192,15 @@ begin
     RecAddIndex(rec, segment);
 end;
 
-procedure RecAddPubDef(var rec: OmfRecord; const group: Word; const segment: Word;
-                       const name: string; const offset: Word; const typeidx: Byte);
+procedure RecAddPubDef(var rec: OmfRecord; const group: Word; const segment: Word);
 begin
     RecAddIndex(rec, group);
     RecAddIndex(rec, segment);
+end;
+
+procedure RecAddPubDefName(var rec: OmfRecord; const name: string; const offset: Word;
+                           const typeidx: Byte);
+begin
     RecAddName(rec, name);
     RecAddWord(rec, offset);
     RecAddByte(rec, typeidx);

diff --git a/opcode.pas b/opcode.pas
line changes: +10/-5
index 347bf86..c543d23
--- a/opcode.pas
+++ b/opcode.pas

@@ -22,7 +22,8 @@ Mnemonic = (
     mSINTR,
     mTSL,
     mWID,
-    mXFER
+    mXFER,
+    mInvalid
 );
 
 Directive = (

@@ -73,7 +74,7 @@ Operand = record
 end;
 
 InstructionRBP = (
-    rbpReg,    { RbP Encodes a register (rrr) }
+    rbpReg,    { RbP Encodes a register (RRR) }
     rbpBit,    { RbP Encodes a bit position (bbb) }
     rbpPtrReg, { RbP encodes a pointer register (PPP) }
     rbpNone,   { RbP encodes nothing (0b000) }

@@ -133,8 +134,9 @@ InstructionEncoding = record
     wb: EncWB;
     rbp: InstructionRBP;
 end;
+PInstructionEncoding = ^InstructionEncoding;
 
-const IEnc: array[0..79] of InstructionEncoding = (
+const IEnc: array[0..80] of InstructionEncoding = (
     (m: mADD;   o1: OpRegister; o2: OpMemory;    o3: OpAbsent;
         op: $A0;   w: widWord;     wb: wReserved;   rbp: rbpReg),

@@ -217,7 +219,7 @@ const IEnc: array[0..79] of InstructionEncoding = (
         op: $B4;   w: widByte;     wb: wLocation;   rbp: rbpNone),
 
     (m: mJMP;   o1: OpLocation; o2: OpAbsent;    o3: OpAbsent;
-        op: $20;   w: widWord;     wb: wLocation;   rbp: rbpJmp), { AUG shows w changing for 8/16-bit displacement }
+        op: $20;   w: widByte;     wb: wLocation;   rbp: rbpJmp), { AUG shows w changing for 8/16-bit displacement }
 
     (m: mJNBT;  o1: OpMemory;   o2: OpBit;       o3: OpLocation;
         op: $B8;   w: widByte;     wb: wLocation;   rbp: rbpBit),

@@ -376,7 +378,10 @@ const IEnc: array[0..79] of InstructionEncoding = (
         op: $00;   w: widByte;     wb: wReserved;   rbp: rbpWid),
 
     (m: mXFER;   o1: OpAbsent;   o2: OpAbsent;    o3: OpAbsent;
-        op: $00;   w: widByte;     wb: wReserved;   rbp: rbpXfer)
+        op: $00;   w: widByte;     wb: wReserved;   rbp: rbpXfer),
+
+    (m: mInvalid; o1: OpAbsent; o2: OpAbsent; o3: OpAbsent;
+        op: $FF;   w: widByte;     wb: wReserved;   rbp: rbpNone)
 );
 
 procedure GetMnemonicEncodingBounds(const m: Mnemonic; var i, j: Integer);

diff --git a/parse.pas b/parse.pas
line changes: +32/-18
index 92f08e1..80f5bb1
--- a/parse.pas
+++ b/parse.pas

@@ -1,11 +1,10 @@
 unit Parse;
 interface
 
-uses Opcode;
+uses Opcode, Util, SymTable;
 
 function Trim(var s: string): String;
 procedure StripComments(var s: string);
-function ToUpCase(s: string): string;
 
 procedure ConsumeWhitespace(const s: string; var i: Byte);
 function ConsumeIdentifier(const s: String; var i: Byte): String;

@@ -15,6 +14,8 @@ procedure ConsumeWhitespaceAndCommas(const s: string; var i: Byte);
 function ParseNumericExpression(const s: String; var i: Word): Boolean;
 function ParseMnemonic(const m: String): Mnemonic;
 function ParseOperand(a: String; var o: Operand; const ot: OperandType): Boolean;
+function MnemonicStr(const m: Mnemonic): string;
+function ShortOperandStr(const ot: OperandType): string;
 
 procedure DieBadOperand(const a: String);

@@ -30,7 +31,18 @@ MnemonicTranslation: array[Mnemonic] of String = (
     'LJMCE', 'LJMCNE', 'LJMP', 'LJNBT', 'LJNZ', 'LJNZB', 'LJZ', 'LJZB',
     'LPD', 'LPDI', 'MOV', 'MOVB', 'MOVBI', 'MOVI', 'MOVP', 'NOP', 'NOT',
     'NOTB', 'OR', 'ORB', 'ORBI', 'ORI', 'SETB', 'SINTR', 'TSL', 'WID',
-    'XFER'
+    'XFER', 'Invalid'
+);
+ShortOperands: array[OperandType] of string = (
+        'unknown[ERROR]',
+        'REG',
+        'PTRREG',
+        'IMMED',
+        'LOC',
+        'MEM',
+        'BIT',
+        'WIDTH',
+        ''
 );
 
 function IsWhitespace(c: Char): Boolean;

@@ -79,14 +91,6 @@ begin
         s[0] := Char(i - 1);
 end;
 
-function ToUpCase(s: string): string;
-var i: Byte;
-begin
-    for i := 1 to Length(s) do
-        s[i] := UpCase(s[i]);
-    ToUpCase := s;
-end;
-
 procedure ConsumeWhitespace(const s: string; var i: Byte);
 begin
     while (i <= Length(s)) AND IsWhitespace(s[i]) do

@@ -169,8 +173,8 @@ begin
             ParseMnemonic := i;
             Exit;
         end;
-    Writeln('Invalid mnemonic: ', m);
-    Halt(1);
+    { This mnemonic is invalid }
+    ParseMnemonic := mInvalid;
 end;
 
 procedure ParseRegister(a: String; var o: Operand);

@@ -225,11 +229,11 @@ begin
     i := 1;
     if IsAlpha(a[1]) OR (a[1] = '_') then begin
         w := ToUpCase(ConsumeIdentifier(a, i));
-        if FindSymbol(w, j) then
+        if FindSymbol(currentObj^.symbols, w, j) then
             o.immed := j
         else begin
             Writeln('Unknown symbol: ', a);
-            DumpSymbols;
+            { DumpSymbols(currentObj^.symbols); }
             Halt(1);
         end;
     end

@@ -249,7 +253,7 @@ begin
         Writeln('Malformed location? `', a, '`');
         Halt(1);
     end;
-    if FindSymbol(l, o.loc) then begin
+    if FindSymbol(currentObj^.symbols, l, o.loc) then begin
         o.Kind := OpLocation;
         Exit;
     end;

@@ -281,10 +285,10 @@ begin
     i := i + 2;
 
     ConsumeWhitespace(a, i);
-    if a[i] = ']' then begin
+    if (i <= Length(a)) AND (a[i] = ']') then begin
         Inc(i);
         ConsumeWhitespace(a, i);
-        if a[i] = '.' then begin
+        if (i <= Length(a)) AND (a[i] = '.') then begin
             Inc(i);
             ConsumeWhitespace(a, i);
             o.offset := ParseIntLiteral(Copy(a, i, Length(a) - i + 1));

@@ -351,6 +355,16 @@ begin
     ParseOperand := o.Kind <> OpUnknown;
 end;
 
+function MnemonicStr(const m: Mnemonic): string;
+begin
+    MnemonicStr := MnemonicTranslation[m];
+end;
+
+function ShortOperandStr(const ot: OperandType): string;
+begin
+    ShortOperandStr := ShortOperands[ot];
+end;
+
 procedure DieBadOperand(const a: String);
 begin
     Writeln('Invalid operand: ', a);

diff --git a/util.pas b/util.pas
line changes: +0/-92
index 8372f72..0000000
--- a/util.pas
+++ /dev/null

@@ -1,92 +0,0 @@
-unit Util;
-interface
-
-uses Opcode;
-
-function ChangeExtension(const instr: string; const newext: string): string;
-
-procedure DumpOperand(const o: Operand);
-
-implementation
-
-function ChangeExtension(const instr: string; const newext: string): string;
-var i, j: Integer;
-    outstr: string;
-begin
-    outstr := instr;
-    i := Length(outstr);
-    while i >= 0 do begin
-        if outstr[i] = Char($2E) then break;
-        i := i - 1;
-    end;
-    if i = -1 then begin
-        i := Length(outstr) + 1;
-        outstr[i] := '.';
-    end;
-    for j := 0 to Length(newext) - 1 do
-        outstr[i + j + 1] := newext[j + 1];
-    outstr[0] := Char(i + j + 1);
-
-    ChangeExtension := outstr;
-end;
-
-procedure DumpReg(const r: EncRRR);
-begin
-    Write('reg ');
-    case r of
-        rGA: Write('GA');
-        rGB: Write('GB');
-        rGC: Write('GC');
-        rBC: Write('BC');
-        rTP: Write('TP');
-        rIX: Write('IX');
-        rCC: Write('CC');
-        rMC: Write('MC');
-    end;
-end;
-
-procedure DumpPtrReg(const r: EncPPP);
-begin
-    Write('ptrreg ');
-    case r of
-        pGA: Write('GA');
-        pGB: Write('GB');
-        pGC: Write('GC');
-        pReserved: Write('Reserved');
-        pTP: Write('TP');
-    end;
-end;
-
-procedure DumpMemBase(const m: EncMM);
-begin
-    Write('mem ');
-    case m of 
-        mGA: Write('GA');
-        mGB: Write('GB');
-        mGC: Write('GC');
-        mPP: Write('PP');
-    end;
-end;
-
-procedure DumpOperand(const o: Operand);
-begin
-    case o.Kind of
-        OpUnknown: Write('!Unknown!');
-        OpRegister: DumpReg(o.reg);
-        OpPointer: DumpPtrReg(o.preg);
-        OpImmediate: Write('immed ', o.immed);
-        OpLocation: Write('loc ', o.loc);
-        OpMemory: begin
-            DumpMemBase(o.mm);
-            case o.aa of
-                BaseOffset: Write(' + ', o.offset);
-                BaseIndex: Write(' + IX');
-                BaseIndexIncrement: Write(' + IX+');
-            end;
-        end;
-        OpBit: Write('bit ', Byte(o.bit));
-        OpAbsent: Write('absent');
-    end;
-end;
-
-end.

diff --git a/util/symtable.pas b/util/symtable.pas
line changes: +98/-0
index 0000000..c3ee14a
--- /dev/null
+++ b/util/symtable.pas

@@ -0,0 +1,98 @@
+unit SymTable;
+
+interface
+
+type
+    PSymbolEntry = ^SymbolEntry;
+    SymbolEntry = record
+        name: string[32];
+        value: Word;
+        next: PSymbolEntry;
+    end;
+    SymbolTable = record
+        head: PSymbolEntry;
+        entries: Word;
+    end;
+
+procedure DefineSymbol(var table: SymbolTable; const n: String; const value: Word);
+function FindSymbolTableEntry(const table: SymbolTable; const n: string): PSymbolEntry;
+function FindSymbol(const table: SymbolTable; const n: String; var value: Word): Boolean;
+procedure DumpSymbols(const table: SymbolTable);
+
+implementation
+
+uses Util;
+
+function GetTableEnd(const table: SymbolTable): PSymbolEntry;
+var p: PSymbolEntry;
+begin
+    p := table.head;
+    while p <> nil do begin
+        if p^.next = nil then begin
+            GetTableEnd := p;
+            Exit;
+        end;
+        p := p^.next;
+    end;
+end;
+
+procedure DefineSymbol(var table: SymbolTable; const n: String; const value: Word);
+var p: PSymbolEntry;
+    new_entry: PSymbolEntry;
+begin
+    if FindSymbolTableEntry(table, n) <> nil then begin
+        Writeln('Symbol ''', n, ''' already defined');
+        Halt(1);
+    end;
+    if table.head = nil then begin
+        New(table.head);
+        new_entry := table.head;
+    end
+    else begin
+        p := GetTableEnd(table);
+        New(new_entry);
+        p^.next := new_entry;
+    end;
+    new_entry^.name := ToUpCase(n);
+    new_entry^.value := value;
+    new_entry^.next := nil;
+    Inc(table.entries);
+end;
+
+function FindSymbolTableEntry(const table: SymbolTable; const n: string): PSymbolEntry;
+var p: PSymbolEntry;
+begin
+    p := table.head;
+    while p <> nil do begin
+        if p^.name = n then begin
+            FindSymbolTableEntry := p;
+            Exit;
+        end;
+        p := p^.next;
+    end;
+    FindSymbolTableEntry := nil;
+end;
+
+function FindSymbol(const table: SymbolTable; const n: String; var value: Word): Boolean;
+var p: PSymbolEntry;
+begin
+    p := FindSymbolTableEntry(table, n);
+    if p <> nil then begin
+        FindSymbol := true;
+        value := p^.value;
+    end
+    else
+        FindSymbol := false;
+end;
+
+procedure DumpSymbols(const table: SymbolTable);
+var p: PSymbolEntry;
+begin
+    p := table.head;
+    while p <> nil do begin
+        Writeln(p^.name, ': ', p^.value);
+        p := p^.next;
+    end;
+end;
+
+end. 
\ No newline at end of file

diff --git a/util/util.pas b/util/util.pas
line changes: +101/-0
index 0000000..fd5fd99
--- /dev/null
+++ b/util/util.pas

@@ -0,0 +1,101 @@
+unit Util;
+interface
+
+uses Opcode;
+
+function ToUpCase(s: string): string;
+function ChangeExtension(const instr: string; const newext: string): string;
+
+procedure DumpOperand(const o: Operand);
+
+implementation
+
+function ToUpCase(s: string): string;
+var i: Byte;
+begin
+    for i := 1 to Length(s) do
+        s[i] := UpCase(s[i]);
+    ToUpCase := s;
+end;
+
+function ChangeExtension(const instr: string; const newext: string): string;
+var i, j: Integer;
+    outstr: string;
+begin
+    outstr := instr;
+    i := Length(outstr);
+    while i >= 0 do begin
+        if outstr[i] = Char($2E) then break;
+        i := i - 1;
+    end;
+    if i = -1 then begin
+        i := Length(outstr) + 1;
+        outstr[i] := '.';
+    end;
+    for j := 0 to Length(newext) - 1 do
+        outstr[i + j + 1] := newext[j + 1];
+    outstr[0] := Char(i + j + 1);
+
+    ChangeExtension := outstr;
+end;
+
+procedure DumpReg(const r: EncRRR);
+begin
+    Write('reg ');
+    case r of
+        rGA: Write('GA');
+        rGB: Write('GB');
+        rGC: Write('GC');
+        rBC: Write('BC');
+        rTP: Write('TP');
+        rIX: Write('IX');
+        rCC: Write('CC');
+        rMC: Write('MC');
+    end;
+end;
+
+procedure DumpPtrReg(const r: EncPPP);
+begin
+    Write('ptrreg ');
+    case r of
+        pGA: Write('GA');
+        pGB: Write('GB');
+        pGC: Write('GC');
+        pReserved: Write('Reserved');
+        pTP: Write('TP');
+    end;
+end;
+
+procedure DumpMemBase(const m: EncMM);
+begin
+    Write('mem ');
+    case m of 
+        mGA: Write('GA');
+        mGB: Write('GB');
+        mGC: Write('GC');
+        mPP: Write('PP');
+    end;
+end;
+
+procedure DumpOperand(const o: Operand);
+begin
+    case o.Kind of
+        OpUnknown: Write('!Unknown!');
+        OpRegister: DumpReg(o.reg);
+        OpPointer: DumpPtrReg(o.preg);
+        OpImmediate: Write('immed ', o.immed);
+        OpLocation: Write('loc ', o.loc);
+        OpMemory: begin
+            DumpMemBase(o.mm);
+            case o.aa of
+                BaseOffset: Write(' + ', o.offset);
+                BaseIndex: Write(' + IX');
+                BaseIndexIncrement: Write(' + IX+');
+            end;
+        end;
+        OpBit: Write('bit ', Byte(o.bit));
+        OpAbsent: Write('absent');
+    end;
+end;
+
+end.