commit:2474b045f1a957ba2de45e7c314bb3e4e16eb85c
author:Chip
committer:Chip
date:Mon Mar 31 01:54:27 2025 -0500
parents:6810fa62f2b511c6040e207b6fa145c7779f8654
Improve parser and add data directives
diff --git a/compiler.pas b/compiler.pas
line changes: +207/-70
index 6dc06ad..b060c50
--- a/compiler.pas
+++ b/compiler.pas
@@ -18,6 +18,9 @@ CompileBuffer = object
     constructor Init;
     destructor Done;
     procedure Add(const bytes: Pointer; const len: Word);
+    procedure AddByte(const b: Byte);
+    procedure AddWord(const w: Word);
+    procedure Increment(const len: Word);
     procedure Write(var f: File);
 end;
 CompiledObject = record
@@ -35,34 +38,52 @@ implementation
 
 uses Parse, Util;
 
-type Parts = object
-    alabel: string[32];
-    mnemonic: string[6];
-    op1: string[32];
-    op2: string[32];
-    op3: string[32];
-    procedure Clear;
-    procedure Dump;
+type PartsKind = (partsNone, partsInstruction, partsDirective);
+Parts = record
+    label_or_name: string[32];
+    mnemonic: string[7];
+    case Kind: PartsKind of
+        partsInstruction: (
+            op1: string[32];
+            op2: string[32];
+            op3: string[32];
+        );
+        partsDirective: (
+            args: string[128];
+        )
 end;
+DataDefType = (DataDB, DataDW, DataDD);
 
 var currentObj: ^CompiledObject;
 
-procedure Parts.Clear;
+procedure PartsClear(var pts: Parts);
 begin
-    alabel := '';
-    mnemonic := '';
-    op1 := '';
-    op2 := '';
-    op3 := '';
+    pts.Kind := partsNone;
+    pts.label_or_name := '';
+    pts.mnemonic := '';
+    pts.op1 := '';
+    pts.op2 := '';
+    pts.op3 := '';
+    pts.args := '';
+
 end;
 
-procedure Parts.Dump;
+procedure PartsDump(const pts: Parts);
 begin
-    Writeln('=== label: ', alabel, '; ',
-            'mnemonic: ', mnemonic, '; ',
-            'op1: ', op1, '; ',
-            'op2: ', op2, '; ',
-            'op3: ', op3, ';');
+    if pts.kind = partsNone then
+        Writeln('=== NONE')
+    else if pts.kind = partsInstruction then begin
+        Writeln('=== Instruction label: ', pts.label_or_name, '; ',
+                'mnemonic: ', pts.mnemonic, '; ',
+                'op1: ', pts.op1, '; ',
+                'op2: ', pts.op2, '; ',
+                'op3: ', pts.op3, ';');
+    end
+    else if pts.kind = partsDirective then begin
+        Writeln('=== Directive label:', pts.label_or_name, '; ',
+                'directive: ', pts.mnemonic, '; ',
+                'args: ', pts.args, '; ');
+    end;
 end;
 
 procedure DumpSymbols;
@@ -96,6 +117,25 @@ begin
     data_len := data_len + len;
 end;
 
+procedure CompileBuffer.AddByte(const b: Byte);
+begin
+    data^[data_len] := b;
+    Inc(data_len);
+end;
+
+procedure CompileBuffer.AddWord(const w: Word);
+begin
+    data^[data_len] := Lo(w);
+    Inc(data_len);
+    data^[data_len] := Hi(w);
+    Inc(data_len);
+end;
+
+procedure CompileBuffer.Increment(const len: Word);
+begin
+    data_len := data_len + len;
+end;
+
 procedure CompileBuffer.Write(var f: File);
 begin
     BlockWrite(f, data, data_len);
@@ -107,38 +147,58 @@ var i, j: Byte;
 begin
     i := 1;
     ConsumeWhitespace(s, i);
-    a := ConsumeIdentifier(s, i);
+    a := ToUpCase(ConsumeIdentifier(s, i));
     if s[i] = ':' then begin { this is a label }
-        pts.alabel := ToUpCase(a);
-
+        pts.label_or_name := a;
         Inc(i);
         ConsumeWhitespace(s, i);
         if i > Length(s) then
             Exit;
 
-        pts.mnemonic := ToUpCase(ConsumeIdentifier(s, i));
-
-        ConsumeWhitespace(s, i);
-        pts.op1 := ConsumeUntilComma(s, i);
-        Trim(pts.op1);
+        a := ToUpCase(ConsumeIdentifier(s, i));
+    end;
+    ConsumeWhitespace(s, i);
+    j := i;  { save position }
+    b := ToUpCase(ConsumeIdentifier(s, j));
+    { EQU uniquely and annoyingly has a name before and an arg after }
+    if b = 'EQU' then begin
+        pts.Kind := partsDirective;
+        pts.label_or_name := a;
+        pts.mnemonic := b;
+        ConsumeWhitespace(s, j);
+        pts.args := Copy(s, j, Length(s) - j + 1);
+        Exit;
     end
-    else begin
+    { Things which have a name but no other args }
+    else if (b = 'SEGMENT') or (b = 'STRUC') or (b = 'ENDS') then begin
+        pts.Kind := partsDirective;
+        pts.label_or_name := a;
+        pts.mnemonic := b;
+        Exit;
+    end
+    { Then everything that is just a directive and arguments }
+    else if (a = 'DB') or (a = 'DW') or (a = 'DD') or (a = 'DS') or
+            (a = 'ORG') or (a = 'NAME') or (a = 'PUBLIC') or
+            (a = 'EXTRN') then begin
+        pts.Kind := partsDirective;
+        pts.mnemonic := a;
         ConsumeWhitespace(s, i);
-        j := i; { save position }
-        b := ConsumeIdentifier(s, i);
-        if ToUpCase(b) = 'EQU' then begin
-            pts.mnemonic := 'EQU';
-            pts.op1 := ToUpCase(a);
-            ConsumeWhitespace(s, i);
-            pts.op2 := Copy(s, i, Length(s) - i + 1);
-            Exit;
-        end;
-        i := j; { reparse from earlier position }
-        b := ConsumeUntilComma(s, i);
-        pts.mnemonic := ToUpCase(a);
-        pts.op1 := Trim(b);
+        pts.args := Copy(s, i, Length(s) - i + 1);
+        Exit;
+    end
+    { And finally, standalone directives }
+    else if (a = 'EVEN') or (a = 'END') then begin
+        pts.Kind := partsDirective;
+        pts.mnemonic := a;
+        Exit;
     end;
 
+    { Everything left must be an instruction }
+    pts.Kind := partsInstruction;
+    pts.mnemonic := a;
+    ConsumeWhitespace(s, i);
+    pts.op1 := ConsumeUntilComma(s, i);
+    Trim(pts.op1);
     ConsumeWhitespaceAndCommas(s, i);
     if i > Length(s) then
         Exit;
@@ -178,7 +238,7 @@ begin
     FindSymbol := false;
 end;
 
-procedure AddPublic(p: String);
+procedure AddPublic(const s: String);
 var i: Byte;
     j: Word;
     n: String[32];
@@ -192,20 +252,70 @@ begin
         Halt(1);
     end;
     i := 1;
-    ConsumeWhitespace(p, i);
-    ConsumeIdentifier(p, i); { should be our PUBLIC declaration }
-    ConsumeWhitespace(p, i);
     repeat
-        n := ConsumeUntilComma(p, i);
+        n := ConsumeUntilComma(s, i);
         if Length(n) > 0 then begin
             currentObj^.public_symbols[j] := ToUpCase(Trim(n));
             Inc(j);
             Inc(i);
-            ConsumeWhitespace(p, i);
+            ConsumeWhitespace(s, i);
         end;
     until Length(n) = 0;
 end;
 
+procedure DefineData(const s: String; width: DataDefType);
+var i, j: Byte;
+    n: Word;
+    def: String[32];
+begin
+    i := 1;
+    repeat
+        def := ConsumeUntilComma(s, i);
+        if Length(def) > 0 then begin
+            Trim(def);
+            if def[1] = '''' then
+                case width of
+                    dataDB: begin
+                        for j := 2 to Length(def) do begin
+                            if def[j] = '''' then
+                                Break;
+                            currentObj^.buffer.AddByte(Byte(def[j]));
+                        end;
+                        if j < Length(def) then
+                            DieBadOperand('Trailing garbage after terminal '' in string');
+                    end;
+                    DataDW: begin
+                        if Length(def) = 3 then
+                            n := Word(def[2])
+                        else if Length(def) = 4 then
+                            n := (Word(def[2]) shl 8) or Word(def[3])
+                        else
+                            DieBadOperand('Malformed string in DW definition');
+                        currentObj^.buffer.AddWord(n);
+                    end;
+                    DataDD: DieBadOperand('Cannot use strings in DD definition');
+                end
+            else begin
+                if not ParseNumericExpression(def, n) then
+                    DieBadOperand(def);
+                case width of
+                    DataDB: currentObj^.buffer.AddByte(Lo(n));
+                    DataDW: currentObj^.buffer.AddWord(n);
+                    DataDD: begin
+                        { Uhhh punting here as I don't really understand what
+                          values are allowed in a DD. This only allows 16-bit
+                          values and pads them to 32-bit. }
+                        currentObj^.buffer.AddWord(n);
+                        currentObj^.buffer.AddWord(0);
+                    end;
+                end;
+            end;
+            Inc(i); { skip comma }
+            ConsumeWhitespace(s, i);
+        end;
+    until Length(def) = 0;
+end;
+
 procedure CompileInstruction(const pts: Parts);
 var i, j: Integer;
     instr: array[0..5] of Byte;
@@ -310,11 +420,47 @@ begin
     Writeln('');
 end;
 
+procedure CompileDirective(const pts: Parts);
+var op: Operand;
+begin
+    if pts.mnemonic = 'EQU' then begin
+        if not ParseOperand(pts.args, op, OpImmediate) then
+            DieBadOperand(pts.args);
+        DefineSymbol(pts.label_or_name, op.immed);
+    end
+    else if pts.mnemonic = 'DB' then begin
+        DefineData(pts.args, DataDB);
+    end
+    else if pts.mnemonic = 'DW' then begin
+        DefineData(pts.args, DataDW);
+    end
+    else if pts.mnemonic = 'DD' then
+    begin
+        DefineData(pts.args, DataDD);
+    end
+    else if pts.mnemonic = 'DS' then begin
+        if not ParseOperand(pts.args, op, OpImmediate) then
+            DieBadOperand(pts.args);
+        currentObj^.buffer.Increment(op.immed);
+    end
+    else if pts.mnemonic = 'NAME' then begin
+        currentObj^.name := pts.op1;
+    end
+    else if pts.mnemonic = 'PUBLIC' then begin
+        AddPublic(pts.args);
+    end
+    else if pts.mnemonic = 'SEGMENT' then begin
+        Writeln('Segment ', pts.label_or_name, ' begins');
+    end
+    else if pts.mnemonic = 'ENDS' then begin
+        Writeln('Segment ', pts.label_or_name, ' ends');
+    end
+end;
+
 procedure Compile(const filename: string; var obj: CompiledObject);
 var f: Text;
     linebuf: string;
     pts: Parts;
-    op: Operand;
 begin
     currentObj := @obj;  { poor man's `this` pointer }
     obj.buffer.Init;
@@ -326,28 +472,19 @@ begin
         StripComments(linebuf);
         if Length(linebuf) = 0 then { nothing left on the line }
             Continue;
-        pts.Clear;
+        PartsClear(pts);
         PickParts(linebuf, pts);
-        pts.Dump();
-
-        if pts.alabel <> '' then
-            DefineSymbol(pts.alabel, obj.buffer.data_len);
-
-        if pts.mnemonic = '' then
-            Continue
-        else if pts.mnemonic = 'NAME' then begin
-            obj.name := pts.op1;
-        end
-        else if pts.mnemonic = 'PUBLIC' then begin
-            AddPublic(linebuf);
-        end
-        else if pts.mnemonic = 'EQU' then begin
-            if not ParseOperand(pts.op2, op, OpImmediate) then
-                DieBadOperand(pts.op2);
-            DefineSymbol(pts.op1, op.immed);
-        end
-        else
-            CompileInstruction(pts);
+        PartsDump(pts);
+
+        if (pts.label_or_name <> '') and
+            not ((pts.mnemonic = 'EQU') or (pts.mnemonic = 'STRUC') or
+                 (pts.mnemonic = 'SEGMENT') or (pts.mnemonic = 'ENDS')) then
+            DefineSymbol(pts.label_or_name, obj.buffer.data_len);
+
+        case pts.Kind of
+            partsDirective: CompileDirective(pts);
+            partsInstruction: CompileInstruction(pts);
+        end;
     end;
     DumpSymbols;
 end;

diff --git a/examples/data.asm b/examples/data.asm
line changes: +10/-0
index 0000000..1b5f6df
--- /dev/null
+++ b/examples/data.asm
@@ -0,0 +1,10 @@
+	name data
+	public _test_data
+
+data	SEGMENT
+_test_data:
+	db 1, 20h, 'hello'
+	dw AA55h, 'X', 'AB'
+	dd 42
+	ds 8
+data	ENDS

diff --git a/opcode.pas b/opcode.pas
line changes: +17/-3
index 541bbce..347bf86
--- a/opcode.pas
+++ b/opcode.pas
@@ -25,6 +25,20 @@ Mnemonic = (
     mXFER
 );
 
+Directive = (
+    dEQU,
+    dDB, dDW, dDD, dDS,
+    dSTRUC,
+    dORG,
+    dEVEN,
+    dNAME,
+    dSEGMENT,
+    dPUBLIC,
+    dEXTRN,
+    dENDS,
+    dEND
+);
+
 EncRRR = (rGA, rGB, rGC, rBC, rTP, rIX, rCC, rMC);
 EncBBB = (Bit0, Bit1, Bit2, Bit3, Bit4, Bit5, Bit6, Bit7);
 EncPPP = (pGA, pGB, pGC, pReserved, pTP);
@@ -37,6 +51,7 @@ EncWB = (
 );
 EncAA = (Base, BaseOffset, BaseIndex, BaseIndexIncrement);
 EncMM = (mGA, mGB, mGC, mPP);
+EncWidth = (widByte, widWord);
 
 OperandType = (OpUnknown, OpRegister, OpPointer, OpImmediate, OpLocation,
                OpMemory, OpBit, OpWidth, OpAbsent);
@@ -54,7 +69,7 @@ Operand = record
             offset: Word;
         );
         OpBit: (bit: EncBBB);
-        OpWidth: (width: Byte);
+        OpWidth: (width: EncWidth);
 end;
 
 InstructionRBP = (
@@ -68,7 +83,6 @@ InstructionRBP = (
     rbpWid,    { RbP encodes logical widths }
     rbpXfer    { RbP encodes 0b011 }
 );
-InstructionWidth = (widByte, widWord);
 { |        low order byte         |        high order byte        | }
 { | 7 | 6 | 5 | 4 | 3 | 2 | 1 | 0 | 7 | 6 | 5 | 4 | 3 | 2 | 1 | 0 | }
 { |   R/b/P   |  W B  |  A A  | W |        OPCODE         |  MM   | }
@@ -115,7 +129,7 @@ InstructionEncoding = record
     o3: OperandType;
     { these parts are encoded when the above fields match }
     op: Byte;  { pre-shifted into the high 6-bits }
-    w: InstructionWidth;
+    w: EncWidth;
     wb: EncWB;
     rbp: InstructionRBP;
 end;

diff --git a/parse.pas b/parse.pas
line changes: +13/-0
index 01b6673..92f08e1
--- a/parse.pas
+++ b/parse.pas
@@ -12,6 +12,7 @@ function ConsumeIdentifier(const s: String; var i: Byte): String;
 function ConsumeUntilComma(const s: string; var i: Byte): String;
 procedure ConsumeWhitespaceAndCommas(const s: string; var i: Byte);
 
+function ParseNumericExpression(const s: String; var i: Word): Boolean;
 function ParseMnemonic(const m: String): Mnemonic;
 function ParseOperand(a: String; var o: Operand; const ot: OperandType): Boolean;
 
@@ -324,6 +325,17 @@ begin
     end;
 end;
 
+procedure ParseWidth(const a: String; var o: Operand);
+begin
+    if a = '8' then begin
+        o.kind := OpWidth;
+        o.width := widByte;
+    end else if a = '16' then begin
+        o.kind := OpWidth;
+        o.width := widWord;
+    end;
+end;
+
 function ParseOperand(a: String; var o: Operand; const ot: OperandType): Boolean;
 begin
     o.Kind := OpUnknown;
@@ -334,6 +346,7 @@ begin
         OpLocation: ParseLocation(a, o);
         OpMemory: ParseMemory(a, o);
         OpBit: ParseBit(a, o);
+        OpWidth: ParseWidth(a, o);
     end;
     ParseOperand := o.Kind <> OpUnknown;
 end;