Improve parser and add data directives
constructor Init;
destructor Done;
procedure Add(const bytes: Pointer; const len: Word);
+ procedure AddByte(const b: Byte);
+ procedure AddWord(const w: Word);
+ procedure Increment(const len: Word);
procedure Write(var f: File);
end;
CompiledObject = record
uses Parse, Util;
-type Parts = object
- alabel: string[32];
- mnemonic: string[6];
- op1: string[32];
- op2: string[32];
- op3: string[32];
- procedure Clear;
- procedure Dump;
+type PartsKind = (partsNone, partsInstruction, partsDirective);
+Parts = record
+ label_or_name: string[32];
+ mnemonic: string[7];
+ case Kind: PartsKind of
+ partsInstruction: (
+ op1: string[32];
+ op2: string[32];
+ op3: string[32];
+ );
+ partsDirective: (
+ args: string[128];
+ )
end;
+DataDefType = (DataDB, DataDW, DataDD);
var currentObj: ^CompiledObject;
-procedure Parts.Clear;
+procedure PartsClear(var pts: Parts);
begin
- alabel := '';
- mnemonic := '';
- op1 := '';
- op2 := '';
- op3 := '';
+ pts.Kind := partsNone;
+ pts.label_or_name := '';
+ pts.mnemonic := '';
+ pts.op1 := '';
+ pts.op2 := '';
+ pts.op3 := '';
+ pts.args := '';
+
end;
-procedure Parts.Dump;
+procedure PartsDump(const pts: Parts);
begin
- Writeln('=== label: ', alabel, '; ',
- 'mnemonic: ', mnemonic, '; ',
- 'op1: ', op1, '; ',
- 'op2: ', op2, '; ',
- 'op3: ', op3, ';');
+ if pts.kind = partsNone then
+ Writeln('=== NONE')
+ else if pts.kind = partsInstruction then begin
+ Writeln('=== Instruction label: ', pts.label_or_name, '; ',
+ 'mnemonic: ', pts.mnemonic, '; ',
+ 'op1: ', pts.op1, '; ',
+ 'op2: ', pts.op2, '; ',
+ 'op3: ', pts.op3, ';');
+ end
+ else if pts.kind = partsDirective then begin
+ Writeln('=== Directive label:', pts.label_or_name, '; ',
+ 'directive: ', pts.mnemonic, '; ',
+ 'args: ', pts.args, '; ');
+ end;
end;
procedure DumpSymbols;
data_len := data_len + len;
end;
+procedure CompileBuffer.AddByte(const b: Byte);
+begin
+ data^[data_len] := b;
+ Inc(data_len);
+end;
+
+procedure CompileBuffer.AddWord(const w: Word);
+begin
+ data^[data_len] := Lo(w);
+ Inc(data_len);
+ data^[data_len] := Hi(w);
+ Inc(data_len);
+end;
+
+procedure CompileBuffer.Increment(const len: Word);
+begin
+ data_len := data_len + len;
+end;
+
procedure CompileBuffer.Write(var f: File);
begin
BlockWrite(f, data, data_len);
begin
i := 1;
ConsumeWhitespace(s, i);
- a := ConsumeIdentifier(s, i);
+ a := ToUpCase(ConsumeIdentifier(s, i));
if s[i] = ':' then begin { this is a label }
- pts.alabel := ToUpCase(a);
-
+ pts.label_or_name := a;
Inc(i);
ConsumeWhitespace(s, i);
if i > Length(s) then
Exit;
- pts.mnemonic := ToUpCase(ConsumeIdentifier(s, i));
-
- ConsumeWhitespace(s, i);
- pts.op1 := ConsumeUntilComma(s, i);
- Trim(pts.op1);
+ a := ToUpCase(ConsumeIdentifier(s, i));
+ end;
+ ConsumeWhitespace(s, i);
+ j := i; { save position }
+ b := ToUpCase(ConsumeIdentifier(s, j));
+ { EQU uniquely and annoyingly has a name before and an arg after }
+ if b = 'EQU' then begin
+ pts.Kind := partsDirective;
+ pts.label_or_name := a;
+ pts.mnemonic := b;
+ ConsumeWhitespace(s, j);
+ pts.args := Copy(s, j, Length(s) - j + 1);
+ Exit;
end
- else begin
+ { Things which have a name but no other args }
+ else if (b = 'SEGMENT') or (b = 'STRUC') or (b = 'ENDS') then begin
+ pts.Kind := partsDirective;
+ pts.label_or_name := a;
+ pts.mnemonic := b;
+ Exit;
+ end
+ { Then everything that is just a directive and arguments }
+ else if (a = 'DB') or (a = 'DW') or (a = 'DD') or (a = 'DS') or
+ (a = 'ORG') or (a = 'NAME') or (a = 'PUBLIC') or
+ (a = 'EXTRN') then begin
+ pts.Kind := partsDirective;
+ pts.mnemonic := a;
ConsumeWhitespace(s, i);
- j := i; { save position }
- b := ConsumeIdentifier(s, i);
- if ToUpCase(b) = 'EQU' then begin
- pts.mnemonic := 'EQU';
- pts.op1 := ToUpCase(a);
- ConsumeWhitespace(s, i);
- pts.op2 := Copy(s, i, Length(s) - i + 1);
- Exit;
- end;
- i := j; { reparse from earlier position }
- b := ConsumeUntilComma(s, i);
- pts.mnemonic := ToUpCase(a);
- pts.op1 := Trim(b);
+ pts.args := Copy(s, i, Length(s) - i + 1);
+ Exit;
+ end
+ { And finally, standalone directives }
+ else if (a = 'EVEN') or (a = 'END') then begin
+ pts.Kind := partsDirective;
+ pts.mnemonic := a;
+ Exit;
end;
+ { Everything left must be an instruction }
+ pts.Kind := partsInstruction;
+ pts.mnemonic := a;
+ ConsumeWhitespace(s, i);
+ pts.op1 := ConsumeUntilComma(s, i);
+ Trim(pts.op1);
ConsumeWhitespaceAndCommas(s, i);
if i > Length(s) then
Exit;
FindSymbol := false;
end;
-procedure AddPublic(p: String);
+procedure AddPublic(const s: String);
var i: Byte;
j: Word;
n: String[32];
Halt(1);
end;
i := 1;
- ConsumeWhitespace(p, i);
- ConsumeIdentifier(p, i); { should be our PUBLIC declaration }
- ConsumeWhitespace(p, i);
repeat
- n := ConsumeUntilComma(p, i);
+ n := ConsumeUntilComma(s, i);
if Length(n) > 0 then begin
currentObj^.public_symbols[j] := ToUpCase(Trim(n));
Inc(j);
Inc(i);
- ConsumeWhitespace(p, i);
+ ConsumeWhitespace(s, i);
end;
until Length(n) = 0;
end;
+procedure DefineData(const s: String; width: DataDefType);
+var i, j: Byte;
+ n: Word;
+ def: String[32];
+begin
+ i := 1;
+ repeat
+ def := ConsumeUntilComma(s, i);
+ if Length(def) > 0 then begin
+ Trim(def);
+ if def[1] = '''' then
+ case width of
+ dataDB: begin
+ for j := 2 to Length(def) do begin
+ if def[j] = '''' then
+ Break;
+ currentObj^.buffer.AddByte(Byte(def[j]));
+ end;
+ if j < Length(def) then
+ DieBadOperand('Trailing garbage after terminal '' in string');
+ end;
+ DataDW: begin
+ if Length(def) = 3 then
+ n := Word(def[2])
+ else if Length(def) = 4 then
+ n := (Word(def[2]) shl 8) or Word(def[3])
+ else
+ DieBadOperand('Malformed string in DW definition');
+ currentObj^.buffer.AddWord(n);
+ end;
+ DataDD: DieBadOperand('Cannot use strings in DD definition');
+ end
+ else begin
+ if not ParseNumericExpression(def, n) then
+ DieBadOperand(def);
+ case width of
+ DataDB: currentObj^.buffer.AddByte(Lo(n));
+ DataDW: currentObj^.buffer.AddWord(n);
+ DataDD: begin
+ { Uhhh punting here as I don't really understand what
+ values are allowed in a DD. This only allows 16-bit
+ values and pads them to 32-bit. }
+ currentObj^.buffer.AddWord(n);
+ currentObj^.buffer.AddWord(0);
+ end;
+ end;
+ end;
+ Inc(i); { skip comma }
+ ConsumeWhitespace(s, i);
+ end;
+ until Length(def) = 0;
+end;
+
procedure CompileInstruction(const pts: Parts);
var i, j: Integer;
instr: array[0..5] of Byte;
Writeln('');
end;
+procedure CompileDirective(const pts: Parts);
+var op: Operand;
+begin
+ if pts.mnemonic = 'EQU' then begin
+ if not ParseOperand(pts.args, op, OpImmediate) then
+ DieBadOperand(pts.args);
+ DefineSymbol(pts.label_or_name, op.immed);
+ end
+ else if pts.mnemonic = 'DB' then begin
+ DefineData(pts.args, DataDB);
+ end
+ else if pts.mnemonic = 'DW' then begin
+ DefineData(pts.args, DataDW);
+ end
+ else if pts.mnemonic = 'DD' then
+ begin
+ DefineData(pts.args, DataDD);
+ end
+ else if pts.mnemonic = 'DS' then begin
+ if not ParseOperand(pts.args, op, OpImmediate) then
+ DieBadOperand(pts.args);
+ currentObj^.buffer.Increment(op.immed);
+ end
+ else if pts.mnemonic = 'NAME' then begin
+ currentObj^.name := pts.op1;
+ end
+ else if pts.mnemonic = 'PUBLIC' then begin
+ AddPublic(pts.args);
+ end
+ else if pts.mnemonic = 'SEGMENT' then begin
+ Writeln('Segment ', pts.label_or_name, ' begins');
+ end
+ else if pts.mnemonic = 'ENDS' then begin
+ Writeln('Segment ', pts.label_or_name, ' ends');
+ end
+end;
+
procedure Compile(const filename: string; var obj: CompiledObject);
var f: Text;
linebuf: string;
pts: Parts;
- op: Operand;
begin
currentObj := @obj; { poor man's `this` pointer }
obj.buffer.Init;
StripComments(linebuf);
if Length(linebuf) = 0 then { nothing left on the line }
Continue;
- pts.Clear;
+ PartsClear(pts);
PickParts(linebuf, pts);
- pts.Dump();
-
- if pts.alabel <> '' then
- DefineSymbol(pts.alabel, obj.buffer.data_len);
-
- if pts.mnemonic = '' then
- Continue
- else if pts.mnemonic = 'NAME' then begin
- obj.name := pts.op1;
- end
- else if pts.mnemonic = 'PUBLIC' then begin
- AddPublic(linebuf);
- end
- else if pts.mnemonic = 'EQU' then begin
- if not ParseOperand(pts.op2, op, OpImmediate) then
- DieBadOperand(pts.op2);
- DefineSymbol(pts.op1, op.immed);
- end
- else
- CompileInstruction(pts);
+ PartsDump(pts);
+
+ if (pts.label_or_name <> '') and
+ not ((pts.mnemonic = 'EQU') or (pts.mnemonic = 'STRUC') or
+ (pts.mnemonic = 'SEGMENT') or (pts.mnemonic = 'ENDS')) then
+ DefineSymbol(pts.label_or_name, obj.buffer.data_len);
+
+ case pts.Kind of
+ partsDirective: CompileDirective(pts);
+ partsInstruction: CompileInstruction(pts);
+ end;
end;
DumpSymbols;
end;
+ name data
+ public _test_data
+
+data SEGMENT
+_test_data:
+ db 1, 20h, 'hello'
+ dw AA55h, 'X', 'AB'
+ dd 42
+ ds 8
+data ENDS
mXFER
);
+Directive = (
+ dEQU,
+ dDB, dDW, dDD, dDS,
+ dSTRUC,
+ dORG,
+ dEVEN,
+ dNAME,
+ dSEGMENT,
+ dPUBLIC,
+ dEXTRN,
+ dENDS,
+ dEND
+);
+
EncRRR = (rGA, rGB, rGC, rBC, rTP, rIX, rCC, rMC);
EncBBB = (Bit0, Bit1, Bit2, Bit3, Bit4, Bit5, Bit6, Bit7);
EncPPP = (pGA, pGB, pGC, pReserved, pTP);
);
EncAA = (Base, BaseOffset, BaseIndex, BaseIndexIncrement);
EncMM = (mGA, mGB, mGC, mPP);
+EncWidth = (widByte, widWord);
OperandType = (OpUnknown, OpRegister, OpPointer, OpImmediate, OpLocation,
OpMemory, OpBit, OpWidth, OpAbsent);
offset: Word;
);
OpBit: (bit: EncBBB);
- OpWidth: (width: Byte);
+ OpWidth: (width: EncWidth);
end;
InstructionRBP = (
rbpWid, { RbP encodes logical widths }
rbpXfer { RbP encodes 0b011 }
);
-InstructionWidth = (widByte, widWord);
{ | low order byte | high order byte | }
{ | 7 | 6 | 5 | 4 | 3 | 2 | 1 | 0 | 7 | 6 | 5 | 4 | 3 | 2 | 1 | 0 | }
{ | R/b/P | W B | A A | W | OPCODE | MM | }
o3: OperandType;
{ these parts are encoded when the above fields match }
op: Byte; { pre-shifted into the high 6-bits }
- w: InstructionWidth;
+ w: EncWidth;
wb: EncWB;
rbp: InstructionRBP;
end;
function ConsumeUntilComma(const s: string; var i: Byte): String;
procedure ConsumeWhitespaceAndCommas(const s: string; var i: Byte);
+function ParseNumericExpression(const s: String; var i: Word): Boolean;
function ParseMnemonic(const m: String): Mnemonic;
function ParseOperand(a: String; var o: Operand; const ot: OperandType): Boolean;
end;
end;
+procedure ParseWidth(const a: String; var o: Operand);
+begin
+ if a = '8' then begin
+ o.kind := OpWidth;
+ o.width := widByte;
+ end else if a = '16' then begin
+ o.kind := OpWidth;
+ o.width := widWord;
+ end;
+end;
+
function ParseOperand(a: String; var o: Operand; const ot: OperandType): Boolean;
begin
o.Kind := OpUnknown;
OpLocation: ParseLocation(a, o);
OpMemory: ParseMemory(a, o);
OpBit: ParseBit(a, o);
+ OpWidth: ParseWidth(a, o);
end;
ParseOperand := o.Kind <> OpUnknown;
end;