git.bytex64.net -- asm89

commit:	593fbb89f6ab7d41df9eee7f1c849aa2eb3e4a91
author:	Chip
committer:	Chip
date:	Fri Mar 28 23:55:33 2025 -0500
parents:

Progress from last year

diff --git a/Makefile b/Makefile
line changes: +12/-0
index 0000000..fcb8c95
--- /dev/null
+++ b/Makefile

@@ -0,0 +1,12 @@
+emu2 = $(HOME)/DOS/emu2.sh
+tpc = $(HOME)/DOS/TP/BIN/TPC.EXE
+
+all: asm89.exe
+
+asm89.exe: asm89.pas omf.tpu opcode.tpu util.tpu parse.tpu compiler.tpu
+
+%.tpu: %.pas
+	$(emu2) $(tpc) $<
+
+%.exe: %.pas
+	$(emu2) $(tpc) $<

diff --git a/Makefile.fp b/Makefile.fp
line changes: +6/-0
index 0000000..30dc404
--- /dev/null
+++ b/Makefile.fp

@@ -0,0 +1,6 @@
+fpc = fpc -Cr -g
+
+all: asm89
+
+asm89: asm89.pas omf.pas opcode.pas compiler.pas parse.pas util.pas
+	$(fpc) $<

diff --git a/asm89.pas b/asm89.pas
line changes: +89/-0
index 0000000..174d18f
--- /dev/null
+++ b/asm89.pas

@@ -0,0 +1,89 @@
+program Asm89;
+uses Compiler, Omf, Util;
+
+const VERSION: string = '0.1';
+
+var infile: string;
+    outfile: string;
+    cob: CompiledObject;
+
+procedure WriteObjectFile(path: string; obj: CompiledObject);
+var rec: OmfRecord;
+    f: OmfFile;
+    i: Integer;
+    v: Word;
+begin
+    OpenOmf(path, f);
+
+    NewRecord(rec, THEADR);
+    if obj.name = '' then
+        RecAddName(rec, path)
+    else
+        RecAddName(rec, obj.name);
+    WriteRecord(f, rec);
+
+    NewRecord(rec, COMENT);
+    RecAddCommentStr(rec, 0, 0, 'asm89 v' + VERSION);
+    WriteRecord(f, rec);
+
+    NewRecord(rec, LNAMES);
+    RecAddByte(rec, 0); { The first name is empty, apparently used for
+                          the overlay index. }
+    RecAddName(rec, 'DATA');
+    RecAddName(rec, 'DGROUP');
+    RecAddName(rec, 'IOP');
+    WriteRecord(f, rec);
+
+    NewRecord(rec, SEGDEF);
+    { alignment, combine, segment length 5, name index, class index, overlay index }
+    RecAddRelocatableSegment(rec, AlignParagraph, CombinePublic, 5, 4, 2, 1);
+    WriteRecord(f, rec);
+
+    NewRecord(rec, GRPDEF);
+    { group name index, segment definition }
+    RecAddGroupDef(rec, 3, 1);
+    WriteRecord(f, rec);
+
+    NewRecord(rec, LEDATA);
+    RecAddEnumeratedData(rec, 1, 0, obj.buffer.data^, obj.buffer.data_len);
+    WriteRecord(f, rec);
+
+    NewRecord(rec, PUBDEF);
+    for i := 0 to 1023 do begin
+        if Length(obj.public_symbols[i]) = 0 then
+            Break;
+        if NOT FindSymbol(obj.public_symbols[i], v) then begin
+            Writeln('Public symbol `', obj.public_symbols[i], '` not defined');
+            Halt(1);
+        end;
+        { group index, segment index, name, offset, type }
+        RecAddPubDef(rec, 1, 1, obj.public_symbols[i], v, 0);
+    end;
+    WriteRecord(f, rec);
+
+    NewRecord(rec, MODEND);
+    RecAddByte(rec, 0);
+    WriteRecord(f, rec);
+
+    CloseOmf(f);
+end;
+
+begin
+    Writeln('asm89 compiler v', VERSION);
+    Writeln('');
+    if ParamCount < 1 then begin
+        Writeln('You must specify an input file');
+        Halt(1);
+    end;
+    infile := ParamStr(1);
+    if ParamCount < 2 then begin
+        outfile := ChangeExtension(ParamStr(1), 'obj');
+    end
+    else
+        outfile := ParamStr(2);
+
+    Writeln('Assembling ''', infile, ''' to ''', outfile, '''');
+
+    Compile(infile, cob);
+    WriteObjectFile(outfile, cob);
+end.

diff --git a/compiler.pas b/compiler.pas
line changes: +355/-0
index 0000000..6dc06ad
--- /dev/null
+++ b/compiler.pas

@@ -0,0 +1,355 @@
+unit Compiler;
+interface
+
+uses Opcode;
+
+const COMPILE_BUFFER_SIZE: Word = 65535;
+
+type
+SegArray = array[0..32767] of Byte;
+PSegArray = ^SegArray;
+Symbol = record
+    name: string[32];
+    value: Word;
+end;
+CompileBuffer = object
+    data: PSegArray;
+    data_len: Word;
+    constructor Init;
+    destructor Done;
+    procedure Add(const bytes: Pointer; const len: Word);
+    procedure Write(var f: File);
+end;
+CompiledObject = record
+    buffer: CompileBuffer;
+    name: string;
+    symbols: array[0..1023] of Symbol;
+    public_symbols: array[0..255] of String;
+end;
+
+function FindSymbol(const n: String; var value: Word): Boolean;
+procedure DumpSymbols;
+procedure Compile(const filename: string; var obj: CompiledObject);
+
+implementation
+
+uses Parse, Util;
+
+type Parts = object
+    alabel: string[32];
+    mnemonic: string[6];
+    op1: string[32];
+    op2: string[32];
+    op3: string[32];
+    procedure Clear;
+    procedure Dump;
+end;
+
+var currentObj: ^CompiledObject;
+
+procedure Parts.Clear;
+begin
+    alabel := '';
+    mnemonic := '';
+    op1 := '';
+    op2 := '';
+    op3 := '';
+end;
+
+procedure Parts.Dump;
+begin
+    Writeln('=== label: ', alabel, '; ',
+            'mnemonic: ', mnemonic, '; ',
+            'op1: ', op1, '; ',
+            'op2: ', op2, '; ',
+            'op3: ', op3, ';');
+end;
+
+procedure DumpSymbols;
+var i: Integer;
+begin
+    for i := 0 to 1023 do begin
+        if currentObj^.symbols[i].name <> '' then
+            Writeln(currentObj^.symbols[i].name, ': ', currentObj^.symbols[i].value);
+    end;
+end;
+
+constructor CompileBuffer.Init;
+begin
+    New(data);
+end;
+
+destructor CompileBuffer.Done;
+begin
+    Dispose(data);
+end;
+
+procedure CompileBuffer.Add(const bytes: Pointer; const len: Word);
+var i: Integer;
+    b: ^Byte;
+begin
+    b := bytes;
+    for i := 0 to len - 1 do begin
+        data^[data_len + i] := b^;
+        Inc(b);
+    end;
+    data_len := data_len + len;
+end;
+
+procedure CompileBuffer.Write(var f: File);
+begin
+    BlockWrite(f, data, data_len);
+end;
+
+procedure PickParts(const s: String; var pts: Parts);
+var i, j: Byte;
+    a, b: String[32];
+begin
+    i := 1;
+    ConsumeWhitespace(s, i);
+    a := ConsumeIdentifier(s, i);
+    if s[i] = ':' then begin { this is a label }
+        pts.alabel := ToUpCase(a);
+
+        Inc(i);
+        ConsumeWhitespace(s, i);
+        if i > Length(s) then
+            Exit;
+
+        pts.mnemonic := ToUpCase(ConsumeIdentifier(s, i));
+
+        ConsumeWhitespace(s, i);
+        pts.op1 := ConsumeUntilComma(s, i);
+        Trim(pts.op1);
+    end
+    else begin
+        ConsumeWhitespace(s, i);
+        j := i; { save position }
+        b := ConsumeIdentifier(s, i);
+        if ToUpCase(b) = 'EQU' then begin
+            pts.mnemonic := 'EQU';
+            pts.op1 := ToUpCase(a);
+            ConsumeWhitespace(s, i);
+            pts.op2 := Copy(s, i, Length(s) - i + 1);
+            Exit;
+        end;
+        i := j; { reparse from earlier position }
+        b := ConsumeUntilComma(s, i);
+        pts.mnemonic := ToUpCase(a);
+        pts.op1 := Trim(b);
+    end;
+
+    ConsumeWhitespaceAndCommas(s, i);
+    if i > Length(s) then
+        Exit;
+    pts.op2 := ConsumeUntilComma(s, i);
+    Trim(pts.op2);
+    ConsumeWhitespaceAndCommas(s, i);
+    if i > Length(s) then
+        Exit;
+    pts.op3 := ConsumeUntilComma(s, i);
+    Trim(pts.op3);
+end;
+
+procedure DefineSymbol(const n: String; const value: Word);
+var i: Integer;
+begin
+    Writeln('Define symbol ', ToUpCase(n), ': ', value);
+    for i := 0 to 1023 do begin
+        if currentObj^.symbols[i].name = '' then begin
+            currentObj^.symbols[i].name := ToUpCase(n);
+            currentObj^.symbols[i].value := value;
+            Exit;
+        end;
+    end;
+    Writeln('Too many symbols (> 1024): ', n);
+end;
+
+function FindSymbol(const n: String; var value: Word): Boolean;
+var i: Integer;
+begin
+    for i := 0 to 1023 do begin
+        if currentObj^.symbols[i].name = n then begin
+            FindSymbol := true;
+            value := currentObj^.symbols[i].value;
+            Exit;
+        end;
+    end;
+    FindSymbol := false;
+end;
+
+procedure AddPublic(p: String);
+var i: Byte;
+    j: Word;
+    n: String[32];
+begin
+    for j := 0 to 1023 do begin
+        if Length(currentObj^.public_symbols[j]) = 0 then
+            Break;
+    end;
+    if j = 1024 then begin
+        Writeln('Too many public symbols');
+        Halt(1);
+    end;
+    i := 1;
+    ConsumeWhitespace(p, i);
+    ConsumeIdentifier(p, i); { should be our PUBLIC declaration }
+    ConsumeWhitespace(p, i);
+    repeat
+        n := ConsumeUntilComma(p, i);
+        if Length(n) > 0 then begin
+            currentObj^.public_symbols[j] := ToUpCase(Trim(n));
+            Inc(j);
+            Inc(i);
+            ConsumeWhitespace(p, i);
+        end;
+    until Length(n) = 0;
+end;
+
+procedure CompileInstruction(const pts: Parts);
+var i, j: Integer;
+    instr: array[0..5] of Byte;
+    instr_len: Byte;
+    m: Mnemonic;
+    op: array[1..3] of Operand;
+    ie: ^InstructionEncoding;
+    rbp: Byte;
+    ienc_found: Boolean;
+begin
+    instr_len := 2;
+    m := ParseMnemonic(pts.mnemonic);
+    GetMnemonicEncodingBounds(m, i, j);
+    if i = High(IEnc) then begin
+        Writeln('Instruction not supported: ', m);
+        Halt(2);
+    end;
+    { iterate over possible encodings }
+    ienc_found := false;
+    for i := i to j do begin
+        op[1].Kind := OpAbsent;
+        op[2].Kind := OpAbsent;
+        op[3].Kind := OpAbsent;
+        ie := @IEnc[i];
+        Writeln('  ie ', i, ': ', m, ' ', ie^.o1, ' ', ie^.o2, ' ', ie^.o3);
+        if (ie^.o1 <> OpAbsent) and not ParseOperand(pts.op1, op[1], ie^.o1) then
+            Continue;
+        if (ie^.o2 <> OpAbsent) and not ParseOperand(pts.op2, op[2], ie^.o2) then
+            Continue;
+        if (ie^.o3 <> OpAbsent) and not ParseOperand(pts.op3, op[3], ie^.o3) then
+            Continue;
+        ienc_found := true;
+        Break;
+    end;
+    if NOT ienc_found then begin
+        Writeln('Invalid operands for ', m);
+        Writeln('Wanted ', ie^.o1, ' ', ie^.o2, ' ', ie^.o3);
+        Writeln('Got ', op[1].Kind, ' ', op[2].Kind, ' ', op[3].Kind);
+        Halt(1);
+    end;
+    instr[1] := Byte(ie^.op);
+    instr[0] := (Byte(ie^.wb) SHL 3) OR Byte(ie^.w);
+    for i := 1 to 3 do begin
+        case op[i].Kind of
+            OpMemory: begin
+                if (m = mMOV) AND (i = 2) then begin
+                    Writeln('mem-mem MOV not yet implemented');
+                    Halt(2);
+                end else begin
+                    instr[1] := instr[1] OR Byte(op[i].mm);
+                    instr[0] := instr[0] OR (Byte(op[i].aa) SHL 1);
+                    if op[i].aa = BaseOffset then begin
+                        instr[2] := Lo(op[i].offset);
+                        instr_len := 3;
+                    end;
+                    if m <> mMOV then
+                        Break;
+                end;
+            end;
+            OpRegister:
+                rbp := Byte(op[i].reg);
+            OpPointer:
+                rbp := Byte(op[i].preg);
+            OpImmediate: begin
+                case ie^.wb of
+                    wOne: begin
+                        instr[instr_len - 1] := Lo(op[i].immed);
+                        instr_len := instr_len + 1;
+                    end;
+                    wTwo: begin
+                        instr[instr_len - 1] := Lo(op[i].immed);
+                        instr[instr_len] := Hi(op[i].immed);
+                        instr_len := instr_len + 2;
+                    end;
+                end;
+            end;
+            OpLocation: begin
+                instr[instr_len - 1] := Lo(op[i].loc);
+                if op[i].loc <= 255 then
+                    instr_len := instr_len + 1
+                else begin
+                    instr[instr_len] := Hi(op[i].loc);
+                    instr_len := instr_len + 2;
+                end;
+            end;
+            OpBit:
+                rbp := Byte(op[i].bit);
+        end;
+    end;
+    case ie^.rbp of
+        rbpReg, rbpBit, rbpPtrReg: instr[0] := instr[0] OR (rbp SHL 5);
+        { rbpNone does nothing }
+        rbpHlt: instr[0] := instr[0] OR $20;
+        rbpSintr: instr[0] := instr[0] OR $40;
+        rbpJmp: instr[0] := instr[0] OR $80;
+    end;
+    currentObj^.buffer.Add(@instr, instr_len);
+    Write('mnemonic: ', m, '; op1: ');
+    DumpOperand(op[1]);
+    Write('; op2: ');
+    DumpOperand(op[2]);
+    Writeln('');
+end;
+
+procedure Compile(const filename: string; var obj: CompiledObject);
+var f: Text;
+    linebuf: string;
+    pts: Parts;
+    op: Operand;
+begin
+    currentObj := @obj;  { poor man's `this` pointer }
+    obj.buffer.Init;
+    Assign(f, filename);
+    Reset(f);
+    while not Eof(f) do begin
+        Readln(f, linebuf);
+        Writeln('*** ', linebuf);
+        StripComments(linebuf);
+        if Length(linebuf) = 0 then { nothing left on the line }
+            Continue;
+        pts.Clear;
+        PickParts(linebuf, pts);
+        pts.Dump();
+
+        if pts.alabel <> '' then
+            DefineSymbol(pts.alabel, obj.buffer.data_len);
+
+        if pts.mnemonic = '' then
+            Continue
+        else if pts.mnemonic = 'NAME' then begin
+            obj.name := pts.op1;
+        end
+        else if pts.mnemonic = 'PUBLIC' then begin
+            AddPublic(linebuf);
+        end
+        else if pts.mnemonic = 'EQU' then begin
+            if not ParseOperand(pts.op2, op, OpImmediate) then
+                DieBadOperand(pts.op2);
+            DefineSymbol(pts.op1, op.immed);
+        end
+        else
+            CompileInstruction(pts);
+    end;
+    DumpSymbols;
+end;
+
+end.

diff --git a/examples/scrfuzz.asm b/examples/scrfuzz.asm
line changes: +11/-0
index 0000000..acc7d08
--- /dev/null
+++ b/examples/scrfuzz.asm

@@ -0,0 +1,11 @@
+	name ScreenFuzz
+	public _ScreenFuzz
+
+; increments screen memory in an infinite loop
+_ScreenFuzz:
+	lpd ga, [pp].4  ; pp.4 is the input buffer address
+	movi ix, 0
+1:	inc [ga+ix+]
+	andi ix, 07ffh
+	jmp 1-
+

diff --git a/omf.pas b/omf.pas
line changes: +248/-0
index 0000000..90ca13f
--- /dev/null
+++ b/omf.pas

@@ -0,0 +1,248 @@
+unit OMF;
+interface
+
+type
+OmfFile = record
+    f: file;
+end;
+OmfRecordHeader = record
+    rectype: Byte;
+    length: Word;
+end;
+OmfRecord = record
+    head: OmfRecordHeader;
+    data: array [0..1023] of Byte;
+    data_len: Integer;
+end;
+SegmentAlignment = (AlignAbsolute, AlignByte, AlignWord, AlignParagraph,
+                    AlignPage, AlignDoubleWord);
+SegmentCombination = (CombinePrivate, __reserved1, CombinePublic);
+
+const THEADR: Byte = $80;
+const COMENT: Byte = $88;
+const MODEND: Byte = $8A;
+const LNAMES: Byte = $96;
+const SEGDEF: Byte = $98;
+const GRPDEF: Byte = $9A;
+const PUBDEF: Byte = $90;
+const LINNUM: Byte = $94;
+const LEDATA: Byte = $A0;
+
+procedure OpenOmf(const name: string; var omf: OmfFile);
+procedure CloseOmf(var omf: OmfFile);
+
+procedure NewRecord(var rec: OmfRecord; const rectype: Byte);
+
+procedure RecAddByte(var rec: OmfRecord; const b: Byte);
+procedure RecAddWord(var rec: OmfRecord; const w: Word);
+procedure RecAddIndex(var rec: OmfRecord; const i: Word);
+procedure RecAddData(var rec: OmfRecord; const data: array of Byte; const len: Word);
+
+procedure RecAddName(var rec: OmfRecord; const name: string);
+procedure RecAddCommentStr(var rec: OmfRecord; const ctype: Byte;
+                           const cclass: Byte; const comment: string);
+procedure RecAddRelocatableSegment(var rec: OmfRecord; const alignment: SegmentAlignment;
+                                   const combination: SegmentCombination;
+                                   const len: Word; const name: Word; const class: Word;
+                                   const overlay: Word);
+procedure RecAddGroupDef(var rec: OmfRecord; const groupname: Word; const segment: Word);
+procedure RecAddPubDef(var rec: OmfRecord; const group: Word; const segment: Word;
+                       const name: string; const offset: Word; const typeidx: Byte);
+procedure RecAddEnumeratedData(var rec: OmfRecord; const segment: Word;
+                               const offset: Word; const data: array of Byte;
+                               const len: Word);
+procedure WriteRecord(var omf: OmfFile; var rec: OmfRecord);
+
+implementation
+
+procedure NewRecord(var rec: OmfRecord; const rectype: byte);
+begin
+    rec.head.rectype := rectype;
+    rec.head.length := 0;
+    rec.data_len := 0;
+end;
+
+procedure RecAddByte(var rec: OmfRecord; const b: Byte);
+begin
+    if rec.head.length + 1 >= 1024 then begin
+        Writeln('adding byte exceeds record size');
+        Halt(1);
+    end;
+
+    rec.data[rec.head.length] := b;
+    Inc(rec.head.length);
+end;
+
+procedure RecAddWord(var rec: OmfRecord; const w: Word);
+begin
+    if rec.head.length + 2 >= 1024 then begin
+        Writeln('adding word exceeds record size');
+        Halt(1);
+    end;
+
+    rec.data[rec.head.length] := Lo(w);
+    Inc(rec.head.length);
+    rec.data[rec.head.length] := Hi(w);
+    Inc(rec.head.length);
+end;
+
+procedure RecAddIndex(var rec: OmfRecord; const i: Word);
+begin
+    if i > $7FFF then begin
+        Writeln('cannot encode index: ', i);
+        Halt(1);
+    end;
+
+    if i > $7F then begin
+        if rec.head.length + 2 >= 1024 then begin
+            Writeln('Not enough space in record to encode index: ', i);
+            Halt(1);
+        end;
+        rec.data[rec.head.length] := $80 OR Byte(i shr 8);
+        Inc(rec.head.length);
+        rec.data[rec.head.length] := Byte(i);
+        Inc(rec.head.length);
+    end
+    else
+    begin
+        if rec.head.length + 1 >= 1024 then begin
+            Writeln('Not enough space in record to encode index: ', i);
+            Halt(1);
+        end;
+        rec.data[rec.head.length] := i;
+        Inc(rec.head.length);
+    end;
+end;
+
+procedure RecAddData(var rec: OmfRecord; const data: array of Byte; const len: Word);
+var i: Integer;
+begin
+    if len + rec.head.length > 1024 then begin
+        Writeln('adding data exceeds record size');
+        Halt(1);
+    end;
+    for i := 0 to len - 1 do begin
+        rec.data[rec.head.length] := data[i];
+        Inc(rec.head.length);
+    end;
+end;
+
+procedure RecAddName(var rec: Omfrecord; const name: string);
+var name_len: Integer;
+    i: Integer;
+begin
+    name_len := Length(name);
+    if name_len > 255 then begin
+        Writeln('Name must be less than 255 characters');
+        Halt(1);
+    end;
+    if rec.head.length + name_len >= 1024 then begin
+        Writeln('adding name exceeds record size');
+        Halt(1);
+    end;
+    { We already have a Pascal string, so just write it }
+    for i := 0 to name_len do begin
+        rec.data[rec.head.length] := Byte(name[i]);
+        Inc(rec.head.length);
+    end;
+end;
+
+procedure RecAddCommentStr(var rec: OmfRecord; const ctype: Byte;
+                           const cclass: Byte; const comment: string);
+var comment_len: Integer;
+    i: Integer;
+begin
+    comment_len := Length(comment);
+    if rec.head.length + 2 + comment_len + 1 >= 1024 then begin
+        Writeln('Not enough space in record to add comment');
+        Halt(1);
+    end;
+
+    rec.data[rec.head.length] := ctype;
+    Inc(rec.head.length);
+    rec.data[rec.head.length] := cclass;
+    Inc(rec.head.length);
+    for i := 0 to comment_len do begin
+        rec.data[rec.head.length] := Byte(comment[i]);
+        Inc(rec.head.length);
+    end;
+end;
+
+procedure RecAddRelocatableSegment(var rec: OmfRecord; const alignment: SegmentAlignment;
+                                   const combination: SegmentCombination;
+                                   const len: Word; const name: Word; const class: Word;
+                                   const overlay: Word);
+var attributes: Byte;
+begin
+    attributes := (Byte(alignment) SHL 5) OR (Byte(combination) SHL 2);
+    rec.data[rec.head.length] := attributes;
+    Inc(rec.head.length);
+
+    RecAddWord(rec, len);
+    RecAddIndex(rec, name);
+    RecAddIndex(rec, class);
+    RecAddIndex(rec, overlay);
+end;
+
+procedure RecAddGroupDef(var rec: OmfRecord; const groupname: Word; const segment: Word);
+begin
+    RecAddIndex(rec, groupname);
+    RecAddByte(rec, $FF);
+    RecAddIndex(rec, segment);
+end;
+
+procedure RecAddPubDef(var rec: OmfRecord; const group: Word; const segment: Word;
+                       const name: string; const offset: Word; const typeidx: Byte);
+begin
+    RecAddIndex(rec, group);
+    RecAddIndex(rec, segment);
+    RecAddName(rec, name);
+    RecAddWord(rec, offset);
+    RecAddByte(rec, typeidx);
+end;
+
+procedure RecAddEnumeratedData(var rec: OmfRecord; const segment: Word;
+                               const offset: Word; const data: array of Byte;
+                               const len: Word);
+begin
+    RecAddIndex(rec, segment);
+    RecAddWord(rec, offset);
+    RecAddData(rec, data, len);
+end;
+
+procedure WriteRecord(var omf: OmfFile; var rec: OmfRecord);
+var checksum: Byte;
+    ptr: ^Byte;
+    i: Integer;
+begin
+    { Look, the checksum is unnecessary, but I went through all this trouble
+      because I thought LINK.EXE needed it, so it's staying. }
+    checksum := rec.head.rectype + Hi(rec.head.length) + Lo(rec.head.length);
+    for i := 0 to rec.head.length - 1 do
+        checksum := Byte(Word(checksum) + rec.data[i]);
+    checksum := 255 - checksum;
+    { Length includes the checksum byte }
+    rec.data[rec.head.length] := checksum;
+    Inc(rec.head.length);
+
+    { for some reason BlockWrite cannot access record fields directly in TP }
+    ptr := @rec.head.rectype;
+    BlockWrite(omf.f, ptr^, 1);
+    ptr := @rec.head.length;
+    BlockWrite(omf.f, ptr^, 2);
+    ptr := @rec.data;
+    BlockWrite(omf.f, ptr^, rec.head.length);
+end;
+
+procedure OpenOmf(const name: String; var omf: OmfFile);
+begin
+    Assign(omf.f, name);
+    Rewrite(omf.f, 1);
+end;
+
+procedure CloseOmf(var omf: OmfFile);
+begin
+    Close(omf.f);
+end;
+
+end.

diff --git a/opcode.pas b/opcode.pas
line changes: +158/-0
index 0000000..4778963
--- /dev/null
+++ b/opcode.pas

@@ -0,0 +1,158 @@
+unit Opcode;
+interface
+
+type
+Mnemonic = (
+    mADD, mADDB, mADDBI, mADDI,
+    mAND, mANDB, mANDBI, mANDI,
+    mCALL,
+    mCLR,
+    mDEC, mDECB,
+    mHLT,
+    mINC, mINCB,
+    mJBT, mJMCE, mJMCNE, mJMP, mJNBT, mJNZ, mJNZB, mJZ, mJZB,
+    mLCALL,
+    mLJBT, mLJMCE, mLJMCNE, mLJMP, mLJNBT, mLJNZ, mLJNZB, mLJZ, mLJZB,
+    mLPD, mLPDI,
+    mMOV, mMOVB, mMOVBI, mMOVI, mMOVP,
+    mNOP,
+    mNOT, mNOTB,
+    mOR, mORB, mORBI, mORI,
+    mSETB,
+    mSINTR,
+    mTSL,
+    mWID,
+    mXFER
+);
+
+EncRRR = (rGA, rGB, rGC, rBC, rTP, rIX, rCC, rMC);
+EncBBB = (Bit0, Bit1, Bit2, Bit3, Bit4, Bit5, Bit6, Bit7);
+EncPPP = (pGA, pGB, pGC, pReserved, pTP);
+EncWB = (
+    wReserved, { Instruction has no immediate/displacement data }
+    wOne,      { One byte of immediate/displacement data }
+    wTwo,      { Two bytes of immediate/displacement data }
+    wTSL,      { Special case for TSL: one byte immediate and one byte displacement }
+    wLocation  { One or two bytes displacement dependent on the distance to the target }
+);
+EncAA = (Base, BaseOffset, BaseIndex, BaseIndexIncrement);
+EncMM = (mGA, mGB, mGC, mPP);
+
+OperandType = (OpUnknown, OpRegister, OpPointer, OpImmediate, OpLocation,
+               OpMemory, OpBit, OpAbsent);
+OperandSet = set of OperandType;
+Operand = record
+    case Kind: OperandType of
+        OpUnknown: ();
+        OpRegister: (reg: EncRRR);
+        OpPointer: (preg: EncPPP);
+        OpImmediate: (immed: Word);
+        OpLocation: (loc: Word);
+        OpMemory: (
+            aa: EncAA;
+            mm: EncMM;
+            offset: Word;
+        );
+        OpBit: (bit: EncBBB);
+end;
+
+InstructionRBP = (
+    rbpReg,    { RbP Encodes a register }
+    rbpBit,    { RbP Encodes a bit position }
+    rbpPtrReg, { RbP encodes a pointer register }
+    rbpNone,   { RbP encodes nothing (0b000) }
+    rbpHlt,    { RbP encodes 0b001 }
+    rbpSintr,  { RbP encodes 0b010 }
+    rbpJmp     { RbP encodes 0b100 }
+);
+InstructionWidth = (widByte, widWord);
+InstructionEncoding = record
+    { these parts are matched against }
+    m: Mnemonic;
+    o1: OperandType;
+    o2: OperandType;
+    o3: OperandType;
+    { these parts are encoded when the above fields match }
+    op: Byte;
+    w: InstructionWidth;
+    wb: EncWB;
+    rbp: InstructionRBP;
+end;
+
+const IEnc: array[0..18] of InstructionEncoding = (
+    (m: mADD;   o1: OpRegister; o2: OpMemory;    o3: OpAbsent;
+        op: $A0;   w: widWord;     wb: wReserved;   rbp: rbpReg),
+
+    (m: mADD;   o1: OpMemory;   o2: OpRegister;  o3: OpAbsent;
+        op: $D0;   w: widWord;     wb: wReserved;   rbp: rbpReg),
+
+    (m: mADDB;  o1: OpRegister; o2: OpMemory;    o3: OpAbsent;
+        op: $A0;   w: widWord;     wb: wReserved;   rbp: rbpReg),
+
+    (m: mADDB;  o1: OpMemory;   o2: OpRegister;  o3: OpAbsent;
+        op: $D0;   w: widWord;     wb: wReserved;   rbp: rbpReg), {error in width?}
+
+    (m: mADDBI; o1: OpRegister; o2: OpImmediate; o3: OpAbsent;
+        op: $20;   w: widByte;     wb: wOne;        rbp: rbpReg),
+
+    (m: mADDBI; o1: OpMemory;   o2: OpImmediate; o3: OpAbsent;
+        op: $C0;   w: widByte;     wb: wOne;        rbp: rbpNone),
+
+    (m: mADDI;  o1: OpRegister; o2: OpImmediate; o3: OpAbsent;
+        op: $20;   w: widWord;     wb: wTwo;        rbp: rbpReg),
+
+    (m: mADDI;  o1: OpMemory;   o2: OpImmediate; o3: OpAbsent;
+        op: $C0;   w: widWord;     wb: wTwo;        rbp: rbpNone),
+
+    (m: mAND;   o1: OpRegister; o2: OpMemory;    o3: OpAbsent;
+        op: $A8;   w: widWord;     wb: wReserved;   rbp: rbpReg),
+
+    (m: mAND;   o1: OpMemory;   o2: OpRegister;  o3: OpAbsent;
+        op: $D8;   w: widWord;     wb: wReserved;   rbp: rbpReg),
+
+    (m: mANDB;  o1: OpRegister; o2: OpMemory;    o3: OpAbsent;
+        op: $A8;   w: widByte;     wb: wReserved;   rbp: rbpReg),
+
+    (m: mANDB;  o1: OpMemory;   o2: OpRegister;  o3: OpAbsent;
+        op: $D8;   w: widWord;     wb: wReserved;   rbp: rbpReg),
+
+    (m: mANDBI; o1: OpRegister; o2: OpImmediate; o3: OpAbsent;
+        op: $28;   w: widByte;     wb: wOne;        rbp: rbpReg),
+
+    (m: mANDBI; o1: OpMemory;   o2: OpImmediate; o3: OpAbsent;
+        op: $C8;   w: widByte;     wb: wOne;        rbp: rbpNone),
+
+    (m: mCALL;  o1: OpMemory;   o2: OpLocation;  o3: OpAbsent;
+        op: $9C;   w: widWord;     wb: wLocation;   rbp: rbpJmp),
+
+    (m: mHLT;   o1: OpAbsent;   o2: OpAbsent;    o3: OpAbsent;
+        op: $48;   w: widByte;     wb: wReserved;   rbp: rbpHlt),
+
+    (m: mJBT;   o1: OpMemory;   o2: OpBit;       o3: OpLocation;
+        op: $BC;   w: widByte;     wb: wLocation;   rbp: rbpBit),
+
+    (m: mJMP;   o1: OpLocation; o2: OpAbsent;    o3: OpAbsent;
+        op: $20;   w: widWord;     wb: wTwo;        rbp: rbpJmp),
+
+    (m: mSINTR; o1: OpAbsent;   o2: OpAbsent;    o3: OpAbsent;
+        op: $00;   w: widByte;     wb: wReserved;   rbp: rbpSintr)
+);
+
+procedure GetMnemonicEncodingBounds(const m: Mnemonic; var i, j: Integer);
+
+implementation
+
+procedure GetMnemonicEncodingBounds(const m: Mnemonic; var i, j: Integer);
+var x: Integer;
+begin
+    for x := Low(IEnc) to High(IEnc) do
+        if IEnc[x].m = m then
+            Break;
+    i := x;
+    for x := i + 1 to High(IEnc) do
+        If IEnc[x].m <> m then
+            Break;
+    j := x - 1;
+end;
+
+end.

diff --git a/parse.pas b/parse.pas
line changes: +344/-0
index 0000000..799a792
--- /dev/null
+++ b/parse.pas

@@ -0,0 +1,344 @@
+unit Parse;
+interface
+
+uses Opcode;
+
+function Trim(var s: string): String;
+procedure StripComments(var s: string);
+function ToUpCase(s: string): string;
+
+procedure ConsumeWhitespace(const s: string; var i: Byte);
+function ConsumeIdentifier(const s: String; var i: Byte): String;
+function ConsumeUntilComma(const s: string; var i: Byte): String;
+procedure ConsumeWhitespaceAndCommas(const s: string; var i: Byte);
+
+function ParseMnemonic(const m: String): Mnemonic;
+function ParseOperand(a: String; var o: Operand; const ot: OperandType): Boolean;
+
+procedure DieBadOperand(const a: String);
+
+implementation
+
+uses Compiler;
+
+const
+MnemonicTranslation: array[Mnemonic] of String = (
+    'ADD', 'ADDB', 'ADDBI', 'ADDI', 'AND', 'ANDB', 'ANDBI', 'ANDI',
+    'CALL', 'CLR', 'DEC', 'DECB', 'HLT', 'INC', 'INCB', 'JBT', 'JMCE',
+    'JMCNE', 'JMP', 'JNBT', 'JNZ', 'JNZB', 'JZ', 'JZB', 'LCALL', 'LJBT',
+    'LJMCE', 'LJMCNE', 'LJMP', 'LJNBT', 'LJNZ', 'LJNZB', 'LJZ', 'LJZB',
+    'LPD', 'LPDI', 'MOV', 'MOVB', 'MOVBI', 'MOVI', 'MOVP', 'NOP', 'NOT',
+    'NOTB', 'OR', 'ORB', 'ORBI', 'ORI', 'SETB', 'SINTR', 'TSL', 'WID',
+    'XFER'
+);
+
+function IsWhitespace(c: Char): Boolean;
+begin
+    IsWhitespace := (c = ' ') OR (c = #9);
+end;
+
+function IsAlpha(c: Char): Boolean;
+begin
+    IsAlpha := ((c >= #$41) AND (c <= #$5A)) OR ((c >= #$61) AND (c <= #$7A));
+end;
+
+function IsNumeric(c: Char): Boolean;
+begin
+    IsNumeric := ((c >= #$30) AND (c <= #$39));
+end;
+
+function IsHexadecimal(c: Char): Boolean;
+begin
+    IsHexadecimal := ((c >= '0') AND (c <= '9')) OR ((c >= 'a') OR (c <= 'f'))
+                  OR ((c >= 'A') AND (c <= 'F'));
+end;
+
+function Trim(var s: string): String;
+var i, j: Byte;
+begin
+    for i := 1 to Length(s) do
+        if NOT IsWhitespace(s[i]) then
+            Break;
+    if i = Length(s) then begin
+        Trim := '';
+        Exit;
+    end;
+    for j := Length(s) downto 1 do
+        if NOT IsWhitespace(s[j]) then
+            Break;
+    s := Copy(s, i, j - i + 1);
+    Trim := s;
+end;
+
+procedure StripComments(var s: string);
+var i: Byte;
+begin
+    i := Pos(';', s);
+    if i <> 0 then
+        s[0] := Char(i - 1);
+end;
+
+function ToUpCase(s: string): string;
+var i: Byte;
+begin
+    for i := 1 to Length(s) do
+        s[i] := UpCase(s[i]);
+    ToUpCase := s;
+end;
+
+procedure ConsumeWhitespace(const s: string; var i: Byte);
+begin
+    while (i <= Length(s)) AND IsWhitespace(s[i]) do
+        Inc(i);
+end;
+
+function ConsumeIdentifier(const s: String; var i: Byte): String;
+var j: Byte;
+begin
+    j := i;
+    while (i <= Length(s)) AND (IsAlpha(s[i]) OR IsNumeric(s[i]) OR (s[i] = '_')) do
+        Inc(i);
+    ConsumeIdentifier := Copy(s, j, i - j);
+end;
+
+function ConsumeRegister(const s: String; var i: Byte): String;
+begin
+    ConsumeRegister := Copy(s, i, 2);
+    i := i + 2;
+end;
+
+function ConsumeUntilComma(const s: string; var i: Byte): String;
+var j: Byte;
+begin
+    j := i;
+    while (i <= Length(s)) AND (s[i] <> ',') do
+        Inc(i);
+    ConsumeUntilComma := Copy(s, j, i - j);
+end;
+
+procedure ConsumeWhitespaceAndCommas(const s: string; var i: Byte);
+begin
+    while (i <= Length(s)) AND (IsWhitespace(s[i]) OR (s[i] = ',')) do
+        Inc(i);
+end;
+
+function ParseIntLiteral(const s: String): Word;
+var i, j: Integer;
+    hex: Boolean;
+    ss: String;
+begin
+    hex := False;
+    for i := 1 to Length(s) do begin
+        if (s[i] = 'h') OR (s[i] = 'H') then begin
+            if i < Length(s) then begin
+                Writeln('Trailing garbage in hex literal: ', s);
+                Halt(1);
+            end;
+            hex := True;
+        end
+        else if NOT (IsHexadecimal(s[i]) OR (s[i] = '-') OR (s[i] = '.')) then begin
+            Writeln('Invalid literal: ', s);
+            Halt(1);
+        end;
+    end;
+    if hex then
+        ss := '$' + Copy(s, 0, i - 1)
+    else
+        ss := Copy(s, 1, i);
+    Val(ss, j, i);
+    if i <> 0 then begin
+        Writeln('Invalid literal: ', s);
+        Halt(1);
+    end;
+    ParseIntLiteral := Word(j);
+end;
+
+function ParseMnemonic(const m: String): Mnemonic;
+var i: Mnemonic;
+begin
+    for i := Low(Mnemonic) to High(Mnemonic) do
+        if m = MnemonicTranslation[i] then begin
+            ParseMnemonic := i;
+            Exit;
+        end;
+    Writeln('Invalid mnemonic: ', m);
+    Halt(1);
+end;
+
+procedure ParseRegister(a: String; var o: Operand);
+begin
+    a := ToUpCase(a);
+    if a = 'BC' then
+        o.reg := rBC
+    else if a = 'CC' then
+        o.reg := rCC
+    else if a = 'GA' then
+        o.reg := rGA
+    else if a = 'GB' then
+        o.reg := rGB
+    else if a = 'GC' then
+        o.reg := rGC
+    else if a = 'IX' then
+        o.reg := rIX
+    else if a = 'MC' then
+        o.reg := rMC
+    else if a = 'TP' then
+        o.reg := rTP
+    else begin
+        o.Kind := OpUnknown;
+        Exit;
+    end;
+    o.Kind := OpRegister;
+end;
+
+procedure ParsePointer(a: String; var o: Operand);
+begin
+    a := ToUpCase(a);
+    if a = 'GA' then
+        o.preg := pGA
+    else if a = 'GB' then
+        o.preg := pGB
+    else if a = 'GC' then
+        o.preg := pGC
+    else if a = 'TP' then
+        o.preg := pTP
+    else begin
+        o.Kind := OpUnknown;
+        Exit;
+    end;
+    o.Kind := OpPointer;
+end;
+
+procedure ParseImmediate(a: String; var o: Operand);
+var i: Byte;
+    j: Word;
+    w: String[32];
+begin
+    i := 1;
+    if IsAlpha(a[1]) OR (a[1] = '_') then begin
+        w := ToUpCase(ConsumeIdentifier(a, i));
+        if FindSymbol(w, j) then
+            o.immed := j
+        else begin
+            Writeln('Unknown symbol: ', a);
+            DumpSymbols;
+            Halt(1);
+        end;
+    end
+    else
+        o.immed := ParseIntLiteral(a);
+    o.Kind := OpImmediate;
+end;
+
+procedure ParseLocation(a: String; var o: Operand);
+var i: Byte;
+    l: String;
+begin
+    i := 1;
+    l := ToUpCase(ConsumeIdentifier(a, i));
+    if i < Length(a) + 1 then begin
+        Writeln('Malformed location? `', a, '`');
+        Halt(1);
+    end;
+    if NOT FindSymbol(l, o.loc) then begin
+        Writeln('Symbol not found: ', l);
+        Halt(1);
+    end;
+    o.Kind := OpLocation;
+end;
+
+procedure ParseMemory(a: String; var o: Operand);
+var i: Byte;
+    reg: String[2];
+begin
+    if a[1] <> '[' then
+        Exit;
+
+    i := 2;
+    ConsumeWhitespace(a, i);
+    reg := ToUpCase(Copy(a, i, 2));
+    if reg = 'GA' then
+        o.mm := mGA
+    else if reg = 'GB' then
+        o.mm := mGB
+    else if reg = 'GC' then
+        o.mm := mGC
+    else if reg = 'PP' then
+        o.mm := mPP
+    else
+        Exit;
+    i := i + 2;
+
+    ConsumeWhitespace(a, i);
+    if a[i] = ']' then begin
+        Inc(i);
+        ConsumeWhitespace(a, i);
+        if a[i] = '.' then begin
+            Inc(i);
+            ConsumeWhitespace(a, i);
+            o.offset := ParseIntLiteral(Copy(a, i, Length(a) - i + 1));
+            if o.offset > 255 then begin
+                Write('Memory offset > 255: ');
+                Exit;
+            end;
+            o.aa := BaseOffset;
+        end
+        else
+            o.aa := Base
+    end
+    else if a[i] = '+' then begin
+        Inc(i);
+        ConsumeWhitespace(a, i);
+        reg := ConsumeRegister(a, i);
+        if reg <> 'IX' then
+            Exit;
+        ConsumeWhitespace(a, i);
+        if a[i] = '+' then
+            o.aa := BaseIndexIncrement
+        else if a[i] = ']' then
+            o.aa := BaseIndex
+        else
+            Exit;
+    end;
+    o.Kind := OpMemory;
+end;
+
+procedure ParseBit(a: String; var o: Operand);
+var i: Byte;
+    code: Integer;
+begin
+    Val(a, i, code);
+    if (code = 0) AND (i < 8) then begin
+        o.Kind := OpBit;
+        o.bit := EncBBB(i);
+    end;
+end;
+
+function ParseOperand(a: String; var o: Operand; const ot: OperandType): Boolean;
+begin
+    o.Kind := OpUnknown;
+    case ot of
+        OpRegister: ParseRegister(a, o);
+        OpPointer: ParsePointer(a, o);
+        OpImmediate: ParseImmediate(a, o);
+        OpLocation: ParseLocation(a, o);
+        OpMemory: ParseMemory(a, o);
+        OpBit: ParseBit(a, o);
+    end;
+    ParseOperand := o.Kind <> OpUnknown;
+end;
+
+procedure DieBadOperand(const a: String);
+begin
+    Writeln('Invalid operand: ', a);
+    {
+    Write('  possible operands: ');
+    for i := Low(OperandType) to High(OperandType) do
+        if i in possibleOperands then
+            Write(i, ' ');
+    Writeln('');
+    }
+    Halt(1);
+end;
+
+end.

diff --git a/util.pas b/util.pas
line changes: +92/-0
index 0000000..8372f72
--- /dev/null
+++ b/util.pas

@@ -0,0 +1,92 @@
+unit Util;
+interface
+
+uses Opcode;
+
+function ChangeExtension(const instr: string; const newext: string): string;
+
+procedure DumpOperand(const o: Operand);
+
+implementation
+
+function ChangeExtension(const instr: string; const newext: string): string;
+var i, j: Integer;
+    outstr: string;
+begin
+    outstr := instr;
+    i := Length(outstr);
+    while i >= 0 do begin
+        if outstr[i] = Char($2E) then break;
+        i := i - 1;
+    end;
+    if i = -1 then begin
+        i := Length(outstr) + 1;
+        outstr[i] := '.';
+    end;
+    for j := 0 to Length(newext) - 1 do
+        outstr[i + j + 1] := newext[j + 1];
+    outstr[0] := Char(i + j + 1);
+
+    ChangeExtension := outstr;
+end;
+
+procedure DumpReg(const r: EncRRR);
+begin
+    Write('reg ');
+    case r of
+        rGA: Write('GA');
+        rGB: Write('GB');
+        rGC: Write('GC');
+        rBC: Write('BC');
+        rTP: Write('TP');
+        rIX: Write('IX');
+        rCC: Write('CC');
+        rMC: Write('MC');
+    end;
+end;
+
+procedure DumpPtrReg(const r: EncPPP);
+begin
+    Write('ptrreg ');
+    case r of
+        pGA: Write('GA');
+        pGB: Write('GB');
+        pGC: Write('GC');
+        pReserved: Write('Reserved');
+        pTP: Write('TP');
+    end;
+end;
+
+procedure DumpMemBase(const m: EncMM);
+begin
+    Write('mem ');
+    case m of 
+        mGA: Write('GA');
+        mGB: Write('GB');
+        mGC: Write('GC');
+        mPP: Write('PP');
+    end;
+end;
+
+procedure DumpOperand(const o: Operand);
+begin
+    case o.Kind of
+        OpUnknown: Write('!Unknown!');
+        OpRegister: DumpReg(o.reg);
+        OpPointer: DumpPtrReg(o.preg);
+        OpImmediate: Write('immed ', o.immed);
+        OpLocation: Write('loc ', o.loc);
+        OpMemory: begin
+            DumpMemBase(o.mm);
+            case o.aa of
+                BaseOffset: Write(' + ', o.offset);
+                BaseIndex: Write(' + IX');
+                BaseIndexIncrement: Write(' + IX+');
+            end;
+        end;
+        OpBit: Write('bit ', Byte(o.bit));
+        OpAbsent: Write('absent');
+    end;
+end;
+
+end.