/parse.pas
unit Parse;
interface

uses Opcode, Util, SymTable;

function Trim(var s: string): String;
procedure StripComments(var s: string);

procedure ConsumeWhitespace(const s: string; var i: Byte);
function ConsumeIdentifier(const s: String; var i: Byte): String;
function ConsumeUntilComma(const s: string; var i: Byte): String;
procedure ConsumeWhitespaceAndCommas(const s: string; var i: Byte);

function ParseNumericExpression(const s: String; var i: Word): Boolean;
function ParseMnemonic(const m: String): Mnemonic;
function ParseOperand(a: String; var o: Operand; const ot: OperandType): Boolean;
function MnemonicStr(const m: Mnemonic): string;
function ShortOperandStr(const ot: OperandType): string;

procedure ErrorAtLine(msg: string);
procedure WarnAtLine(msg: string);
procedure DieBadOperand(const a: String);

var currentLine: Integer;
    linebuf: string;

implementation

uses Compiler;

const
MnemonicTranslation: array[Mnemonic] of String = (
    'ADD', 'ADDB', 'ADDBI', 'ADDI', 'AND', 'ANDB', 'ANDBI', 'ANDI',
    'CALL', 'CLR', 'DEC', 'DECB', 'HLT', 'INC', 'INCB', 'JBT', 'JMCE',
    'JMCNE', 'JMP', 'JNBT', 'JNZ', 'JNZB', 'JZ', 'JZB', 'LCALL', 'LJBT',
    'LJMCE', 'LJMCNE', 'LJMP', 'LJNBT', 'LJNZ', 'LJNZB', 'LJZ', 'LJZB',
    'LPD', 'LPDI', 'MOV', 'MOVB', 'MOVBI', 'MOVI', 'MOVP', 'NOP', 'NOT',
    'NOTB', 'OR', 'ORB', 'ORBI', 'ORI', 'SETB', 'SINTR', 'TSL', 'WID',
    'XFER', 'Invalid'
);
ShortOperands: array[OperandType] of string = (
        'unknown[ERROR]',
        'REG',
        'PTRREG',
        'IMMED',
        'LOC',
        'MEM',
        'BIT',
        'WIDTH',
        ''
);

function IsWhitespace(c: Char): Boolean;
begin
    IsWhitespace := (c = ' ') OR (c = #9);
end;

function IsAlpha(c: Char): Boolean;
begin
    IsAlpha := ((c >= #$41) AND (c <= #$5A)) OR ((c >= #$61) AND (c <= #$7A));
end;

function IsNumeric(c: Char): Boolean;
begin
    IsNumeric := ((c >= #$30) AND (c <= #$39));
end;

function IsHexadecimal(c: Char): Boolean;
begin
    IsHexadecimal := ((c >= '0') AND (c <= '9')) OR ((c >= 'a') OR (c <= 'f'))
                  OR ((c >= 'A') AND (c <= 'F'));
end;

function Trim(var s: string): String;
var i, j: Byte;
begin
    for i := 1 to Length(s) do
        if NOT IsWhitespace(s[i]) then
            Break;
    if i = Length(s) then begin
        Trim := '';
        Exit;
    end;
    for j := Length(s) downto 1 do
        if NOT IsWhitespace(s[j]) then
            Break;
    s := Copy(s, i, j - i + 1);
    Trim := s;
end;

procedure StripComments(var s: string);
var i: Byte;
begin
    i := Pos(';', s);
    if i <> 0 then
        s[0] := Char(i - 1);
end;

procedure ConsumeWhitespace(const s: string; var i: Byte);
begin
    while (i <= Length(s)) AND IsWhitespace(s[i]) do
        Inc(i);
end;

function ConsumeIdentifier(const s: String; var i: Byte): String;
var j: Byte;
begin
    j := i;
    while (i <= Length(s)) AND (IsAlpha(s[i]) OR IsNumeric(s[i]) OR (s[i] = '_')) do
        Inc(i);
    ConsumeIdentifier := Copy(s, j, i - j);
end;

function ConsumeRegister(const s: String; var i: Byte): String;
begin
    ConsumeRegister := Copy(s, i, 2);
    i := i + 2;
end;

function ConsumeUntilComma(const s: string; var i: Byte): String;
var j: Byte;
begin
    j := i;
    while (i <= Length(s)) AND (s[i] <> ',') do
        Inc(i);
    ConsumeUntilComma := Copy(s, j, i - j);
end;

procedure ConsumeWhitespaceAndCommas(const s: string; var i: Byte);
begin
    while (i <= Length(s)) AND (IsWhitespace(s[i]) OR (s[i] = ',')) do
        Inc(i);
end;

function ParseIntLiteral(const s: String): Word;
var i, j: Integer;
    hex: Boolean;
    ss: String;
begin
    hex := False;
    for i := 1 to Length(s) do begin
        if (s[i] = 'h') OR (s[i] = 'H') then begin
            if i < Length(s) then begin
                ErrorAtLine('Trailing garbage in hex literal: ' + s);
                Halt(1);
            end;
            hex := True;
        end
        else if NOT (IsHexadecimal(s[i]) OR (s[i] = '-') OR (s[i] = '.')) then begin
            ErrorAtLine('Invalid literal: ' + s);
            Halt(1);
        end;
    end;
    if hex then
        ss := '$' + Copy(s, 0, i - 1)
    else
        ss := Copy(s, 1, i);
    Val(ss, j, i);
    if i <> 0 then begin
        ErrorAtLine('Invalid literal: ' + s);
        Halt(1);
    end;
    ParseIntLiteral := Word(j);
end;

function ParseNumericExpression(const s: String; var i: Word): Boolean;
begin
    { TODO: actually parse expressions }
    i := ParseIntLiteral(s);
    ParseNumericExpression := true;
end;

function ParseMnemonic(const m: String): Mnemonic;
var i: Mnemonic;
begin
    for i := Low(Mnemonic) to High(Mnemonic) do
        if m = MnemonicTranslation[i] then begin
            ParseMnemonic := i;
            Exit;
        end;
    { This mnemonic is invalid }
    ParseMnemonic := mInvalid;
end;

procedure ParseRegister(a: String; var o: Operand);
begin
    a := ToUpCase(a);
    if a = 'BC' then
        o.reg := rBC
    else if a = 'CC' then
        o.reg := rCC
    else if a = 'GA' then
        o.reg := rGA
    else if a = 'GB' then
        o.reg := rGB
    else if a = 'GC' then
        o.reg := rGC
    else if a = 'IX' then
        o.reg := rIX
    else if a = 'MC' then
        o.reg := rMC
    else if a = 'TP' then
        o.reg := rTP
    else begin
        o.Kind := OpUnknown;
        Exit;
    end;
    o.Kind := OpRegister;
end;

procedure ParsePointer(a: String; var o: Operand);
begin
    a := ToUpCase(a);
    if a = 'GA' then
        o.preg := pGA
    else if a = 'GB' then
        o.preg := pGB
    else if a = 'GC' then
        o.preg := pGC
    else if a = 'TP' then
        o.preg := pTP
    else begin
        o.Kind := OpUnknown;
        Exit;
    end;
    o.Kind := OpPointer;
end;

procedure ParseImmediate(a: String; var o: Operand);
var i: Byte;
    j: Word;
    w: String[32];
begin
    i := 1;
    if IsAlpha(a[1]) OR (a[1] = '_') then begin
        w := ToUpCase(ConsumeIdentifier(a, i));
        if FindSymbol(currentObj^.symbols, w, j) then
            o.immed := j
        else begin
            ErrorAtLine('Unknown symbol: ' + a);
            { DumpSymbols(currentObj^.symbols); }
            Halt(1);
        end;
    end
    else
        o.immed := ParseIntLiteral(a);
    o.Kind := OpImmediate;
end;

procedure ParseLocation(a: String; var o: Operand);
var i: Byte;
    l: String;
begin
    i := 1;
    { First try an identifier }
    l := ToUpCase(ConsumeIdentifier(a, i));
    if i < Length(a) + 1 then begin
        ErrorAtLine('Malformed location? `' + a + '`');
        Halt(1);
    end;
    if FindSymbol(currentObj^.symbols, l, o.loc) then begin
        o.Kind := OpLocation;
        Exit;
    end;
    { Then maybe it's a numeric expression }
    if ParseNumericExpression(a, o.loc) then
        o.Kind := OpLocation;
end;

procedure ParseMemory(a: String; var o: Operand);
var i: Byte;
    reg: String[2];
begin
    if a[1] <> '[' then
        Exit;

    i := 2;
    ConsumeWhitespace(a, i);
    reg := ToUpCase(Copy(a, i, 2));
    if reg = 'GA' then
        o.mm := mGA
    else if reg = 'GB' then
        o.mm := mGB
    else if reg = 'GC' then
        o.mm := mGC
    else if reg = 'PP' then
        o.mm := mPP
    else
        Exit;
    i := i + 2;

    ConsumeWhitespace(a, i);
    if (i <= Length(a)) AND (a[i] = ']') then begin
        Inc(i);
        ConsumeWhitespace(a, i);
        if (i <= Length(a)) AND (a[i] = '.') then begin
            Inc(i);
            ConsumeWhitespace(a, i);
            o.offset := ParseIntLiteral(Copy(a, i, Length(a) - i + 1));
            if o.offset > 255 then begin
                Write('Memory offset > 255: ');
                Exit;
            end;
            o.aa := BaseOffset;
        end
        else
            o.aa := Base
    end
    else if a[i] = '+' then begin
        Inc(i);
        ConsumeWhitespace(a, i);
        reg := ToUpCase(ConsumeRegister(a, i));
        if reg <> 'IX' then
            Exit;
        ConsumeWhitespace(a, i);
        if a[i] = '+' then
            o.aa := BaseIndexIncrement
        else if a[i] = ']' then
            o.aa := BaseIndex
        else
            Exit;
    end;
    o.Kind := OpMemory;
end;

procedure ParseBit(a: String; var o: Operand);
var i: Byte;
    code: Integer;
begin
    Val(a, i, code);
    if (code = 0) AND (i < 8) then begin
        o.Kind := OpBit;
        o.bit := EncBBB(i);
    end;
end;

procedure ParseWidth(const a: String; var o: Operand);
begin
    if a = '8' then begin
        o.kind := OpWidth;
        o.width := widByte;
    end else if a = '16' then begin
        o.kind := OpWidth;
        o.width := widWord;
    end;
end;

function ParseOperand(a: String; var o: Operand; const ot: OperandType): Boolean;
begin
    o.Kind := OpUnknown;
    case ot of
        OpRegister: ParseRegister(a, o);
        OpPointer: ParsePointer(a, o);
        OpImmediate: ParseImmediate(a, o);
        OpLocation: ParseLocation(a, o);
        OpMemory: ParseMemory(a, o);
        OpBit: ParseBit(a, o);
        OpWidth: ParseWidth(a, o);
    end;
    ParseOperand := o.Kind <> OpUnknown;
end;

function MnemonicStr(const m: Mnemonic): string;
begin
    MnemonicStr := MnemonicTranslation[m];
end;

function ShortOperandStr(const ot: OperandType): string;
begin
    ShortOperandStr := ShortOperands[ot];
end;

procedure ErrorAtLine(msg: string);
begin
    Writeln();
    Writeln('ERR: ', msg,' on line ', currentLine, ':');
    Writeln(linebuf);
end;

procedure WarnAtLine(msg: string);
begin
    Writeln();
    Writeln('WARN: ', msg,' on line ', currentLine, ':');
    Writeln(linebuf);
end;

procedure DieBadOperand(const a: String);
begin
    ErrorAtLine('Invalid operand: ' + a);
    {
    Write('  possible operands: ');
    for i := Low(OperandType) to High(OperandType) do
        if i in possibleOperands then
            Write(i, ' ');
    Writeln('');
    }
    Halt(1);
end;

end.