diff --git a/00_Common/dotnet/Directory.Build.props b/00_Common/dotnet/Directory.Build.props new file mode 100644 index 00000000..f5a054dc --- /dev/null +++ b/00_Common/dotnet/Directory.Build.props @@ -0,0 +1,8 @@ + + + + enable + 10.0 + + + diff --git a/00_Common/dotnet/Games.Common.Test/Games.Common.Test.csproj b/00_Common/dotnet/Games.Common.Test/Games.Common.Test.csproj index 6fe85680..c90ed518 100644 --- a/00_Common/dotnet/Games.Common.Test/Games.Common.Test.csproj +++ b/00_Common/dotnet/Games.Common.Test/Games.Common.Test.csproj @@ -2,12 +2,11 @@ net6.0 - enable - false + diff --git a/00_Common/dotnet/Games.Common.Test/TextIOTests/TokenizerTests.cs b/00_Common/dotnet/Games.Common.Test/TextIOTests/TokenizerTests.cs new file mode 100644 index 00000000..4e9c9e84 --- /dev/null +++ b/00_Common/dotnet/Games.Common.Test/TextIOTests/TokenizerTests.cs @@ -0,0 +1,33 @@ +using FluentAssertions; +using Xunit; + +namespace Games.Common.IO +{ + public class TokenizerTests + { + [Theory] + [MemberData(nameof(TokenizerTestCases))] + public void ParseTokens_SplitsStringIntoExpectedTokens(string input, string[] expected) + { + var result = Tokenizer.ParseTokens(input); + + result.Should().BeEquivalentTo(expected); + } + + public static TheoryData TokenizerTestCases() => new() + { + { "", new[] { "" } }, + { "aBc", new[] { "aBc" } }, + { " Foo ", new[] { "Foo" } }, + { " \" Foo \" ", new[] { " Foo " } }, + { " \" Foo ", new[] { " Foo " } }, + { "\"\"abc", new[] { "" } }, + { "a\"\"bc", new[] { "a\"\"bc" } }, + { "\"\"", new[] { "" } }, + { ",", new[] { "", "" } }, + { " foo ,bar", new[] { "foo", "bar" } }, + { "\"\"bc,de", new[] { "", "de" } }, + { "a\"b,\" c,d\"e, f ,,g", new[] { "a\"b", " c,d", "f", "", "g" } } + }; + } +} \ No newline at end of file diff --git a/00_Common/dotnet/Games.Common/Games.Common.csproj b/00_Common/dotnet/Games.Common/Games.Common.csproj index 9f5c4f4a..d4c395e8 100644 --- a/00_Common/dotnet/Games.Common/Games.Common.csproj +++ b/00_Common/dotnet/Games.Common/Games.Common.csproj @@ -1,7 +1,7 @@ - netstandard2.0 + netstandard2.1 diff --git a/00_Common/dotnet/Games.Common/IO/Token.cs b/00_Common/dotnet/Games.Common/IO/Token.cs new file mode 100644 index 00000000..3f8267cb --- /dev/null +++ b/00_Common/dotnet/Games.Common/IO/Token.cs @@ -0,0 +1,37 @@ +using System.Text; + +namespace Games.Common.IO +{ + internal class Token + { + protected readonly StringBuilder _builder; + private int _trailingWhiteSpaceCount; + + private Token() + { + _builder = new StringBuilder(); + } + + public Token Append(char character) + { + _builder.Append(character); + + _trailingWhiteSpaceCount = char.IsWhiteSpace(character) ? _trailingWhiteSpaceCount + 1 : 0; + + return this; + } + + public override string ToString() => _builder.ToString(0, _builder.Length - _trailingWhiteSpaceCount); + + public static Token Create() => new(); + + public static Token CreateQuoted() => new QuotedToken(); + + public static implicit operator string(Token token) => token.ToString(); + + internal class QuotedToken : Token + { + public override string ToString() => _builder.ToString(); + } + } +} \ No newline at end of file diff --git a/00_Common/dotnet/Games.Common/IO/Tokenizer.cs b/00_Common/dotnet/Games.Common/IO/Tokenizer.cs new file mode 100644 index 00000000..9a5d43a4 --- /dev/null +++ b/00_Common/dotnet/Games.Common/IO/Tokenizer.cs @@ -0,0 +1,90 @@ +using System; +using System.Collections.Generic; + +namespace Games.Common.IO +{ + internal class Tokenizer + { + private const char Quote = '"'; + private const char Separator = ','; + + private readonly Queue _characters; + + private Tokenizer(string input) => _characters = new Queue(input); + + public static IEnumerable ParseTokens(string input) + { + if (input is null) { throw new ArgumentNullException(nameof(input)); } + + return new Tokenizer(input).ParseTokens(); + } + + private IEnumerable ParseTokens() + { + while (true) + { + var (token, isLastToken) = Consume(_characters); + yield return token; + + if (isLastToken) { break; } + } + } + + public (Token, bool) Consume(Queue characters) + { + var token = Token.Create(); + var state = ITokenizerState.LookForStartOfToken; + + while (characters.TryDequeue(out var character)) + { + (state, token) = state.Consume(character, token); + if (state is AtEndOfTokenState) { return (token, false); } + } + + return (token, true); + } + + private interface ITokenizerState + { + public static ITokenizerState LookForStartOfToken { get; } = new LookForStartOfTokenState(); + + (ITokenizerState, Token) Consume(char character, Token token); + } + + private struct LookForStartOfTokenState : ITokenizerState + { + public (ITokenizerState, Token) Consume(char character, Token token) => + character switch + { + Separator => (new AtEndOfTokenState(), token), + Quote => (new InQuotedTokenState(), Token.CreateQuoted()), + _ when char.IsWhiteSpace(character) => (this, token), + _ => (new InTokenState(), token.Append(character)) + }; + } + + private struct InTokenState : ITokenizerState + { + public (ITokenizerState, Token) Consume(char character, Token token) => + character == Separator ? (new AtEndOfTokenState(), token) : (this, token.Append(character)); + } + + private struct InQuotedTokenState : ITokenizerState + { + public (ITokenizerState, Token) Consume(char character, Token token) => + character == Quote ? (new LookForSeparatorState(), token) : (this, token.Append(character)); + } + + private struct LookForSeparatorState : ITokenizerState + { + public (ITokenizerState, Token) Consume(char character, Token token) => + (character == Separator ? new AtEndOfTokenState() : this, token); + } + + private struct AtEndOfTokenState : ITokenizerState + { + public (ITokenizerState, Token) Consume(char character, Token token) => + throw new InvalidOperationException(); + } + } +} \ No newline at end of file diff --git a/00_Common/dotnet/Games.Common/_InternalsVisibleTo.cs b/00_Common/dotnet/Games.Common/_InternalsVisibleTo.cs new file mode 100644 index 00000000..2ffc2ca3 --- /dev/null +++ b/00_Common/dotnet/Games.Common/_InternalsVisibleTo.cs @@ -0,0 +1,3 @@ +using System.Runtime.CompilerServices; + +[assembly:InternalsVisibleTo("Games.Common.Test")] \ No newline at end of file