The code for ConvertReadStream is now much smaller, and also easier to understand. We can then take all the code that used to be in ConvertReadStream that did the tokenizing and create a new subroutine - the GetToken function that just does the recognizing and labelling of the individual tokens. In the process we also loose a huge number of repeated lines of code, as well as a number of sub-routines such as HandleBorCom and HandleString.
// // My Get Token routine //
function TPasConversion.GetToken(Run: PChar; var aTokenState: TTokenState; var aTokenStr: string):PChar; begin
aTokenState := tsUnknown; aTokenStr := ''''''''; TokenPtr := Run; // Mark were we started
Case Run^ of
#13
:
begin
aTokenState := tsCRLF;
inc(Run, 2);
end;
#1..#9, #11, #12, #14..#32:
begin
while Run^ in [#1..#9, #11, #12, #14..#32] do inc(Run);
inc(Run); while Run^ in [''''A''''..''''Z'''', ''''a''''..''''z'''', ''''0''''..''''9'''', ''''_''''] do inc(Run); TokenLen:= Run - TokenPtr; SetString(aTokenStr, TokenPtr, TokenLen);
if IsKeyWord(aTokenStr) then
begin
if IsDirective(aTokenStr) then aTokenState:= tsDirective
else aTokenState:= tsKeyWord;
end;
end;
''''0''''..''''9'''':
begin
inc(Run);
aTokenState:= tsNumber; while Run^ in [''''0''''..''''9'''', ''''.'''', ''''e'''', ''''E''''] do inc(Run);
end;
''''{'''':
begin
FComment := csBor;
aTokenState := tsComment; while not ((Run^ = ''''}'''') or (Run^ = #0)) do inc(Run); inc(Run);
while Run^ in [''''!'''',''''"'''', ''''%'''', ''''&'''', ''''(''''..''''/'''', '''':''''..''''@'''', ''''[''''..''''^'''', ''''`'''', ''''~''''] do begin