mirror of
https://github.com/coding-horror/basic-computer-games.git
synced 2025-12-22 23:26:40 -08:00
Add input tokenization
This commit is contained in:
90
00_Common/dotnet/Games.Common/IO/Tokenizer.cs
Normal file
90
00_Common/dotnet/Games.Common/IO/Tokenizer.cs
Normal file
@@ -0,0 +1,90 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
|
||||
namespace Games.Common.IO
|
||||
{
|
||||
internal class Tokenizer
|
||||
{
|
||||
private const char Quote = '"';
|
||||
private const char Separator = ',';
|
||||
|
||||
private readonly Queue<char> _characters;
|
||||
|
||||
private Tokenizer(string input) => _characters = new Queue<char>(input);
|
||||
|
||||
public static IEnumerable<string> ParseTokens(string input)
|
||||
{
|
||||
if (input is null) { throw new ArgumentNullException(nameof(input)); }
|
||||
|
||||
return new Tokenizer(input).ParseTokens();
|
||||
}
|
||||
|
||||
private IEnumerable<string> ParseTokens()
|
||||
{
|
||||
while (true)
|
||||
{
|
||||
var (token, isLastToken) = Consume(_characters);
|
||||
yield return token;
|
||||
|
||||
if (isLastToken) { break; }
|
||||
}
|
||||
}
|
||||
|
||||
public (Token, bool) Consume(Queue<char> characters)
|
||||
{
|
||||
var token = Token.Create();
|
||||
var state = ITokenizerState.LookForStartOfToken;
|
||||
|
||||
while (characters.TryDequeue(out var character))
|
||||
{
|
||||
(state, token) = state.Consume(character, token);
|
||||
if (state is AtEndOfTokenState) { return (token, false); }
|
||||
}
|
||||
|
||||
return (token, true);
|
||||
}
|
||||
|
||||
private interface ITokenizerState
|
||||
{
|
||||
public static ITokenizerState LookForStartOfToken { get; } = new LookForStartOfTokenState();
|
||||
|
||||
(ITokenizerState, Token) Consume(char character, Token token);
|
||||
}
|
||||
|
||||
private struct LookForStartOfTokenState : ITokenizerState
|
||||
{
|
||||
public (ITokenizerState, Token) Consume(char character, Token token) =>
|
||||
character switch
|
||||
{
|
||||
Separator => (new AtEndOfTokenState(), token),
|
||||
Quote => (new InQuotedTokenState(), Token.CreateQuoted()),
|
||||
_ when char.IsWhiteSpace(character) => (this, token),
|
||||
_ => (new InTokenState(), token.Append(character))
|
||||
};
|
||||
}
|
||||
|
||||
private struct InTokenState : ITokenizerState
|
||||
{
|
||||
public (ITokenizerState, Token) Consume(char character, Token token) =>
|
||||
character == Separator ? (new AtEndOfTokenState(), token) : (this, token.Append(character));
|
||||
}
|
||||
|
||||
private struct InQuotedTokenState : ITokenizerState
|
||||
{
|
||||
public (ITokenizerState, Token) Consume(char character, Token token) =>
|
||||
character == Quote ? (new LookForSeparatorState(), token) : (this, token.Append(character));
|
||||
}
|
||||
|
||||
private struct LookForSeparatorState : ITokenizerState
|
||||
{
|
||||
public (ITokenizerState, Token) Consume(char character, Token token) =>
|
||||
(character == Separator ? new AtEndOfTokenState() : this, token);
|
||||
}
|
||||
|
||||
private struct AtEndOfTokenState : ITokenizerState
|
||||
{
|
||||
public (ITokenizerState, Token) Consume(char character, Token token) =>
|
||||
throw new InvalidOperationException();
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user