Add code

2021-12-05 21:04:15 +01:00 · 2021-12-05 21:04:15 +01:00 · a802188106
commit a802188106
parent 752a41a527
12 changed files with 1063 additions and 2 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,264 @@
 ## Ignore Visual Studio temporary files, build results, and
 ## files generated by popular Visual Studio add-ons.
 # User-specific files
 *.suo
 *.user
 *.userosscache
 *.sln.docstates
 # User-specific files (MonoDevelop/Xamarin Studio)
 *.userprefs
 # Build results
 [Dd]ebug/
 [Dd]ebugPublic/
 [Rr]elease/
 [Rr]eleases/
 x64/
 x86/
 bld/
 [Bb]in/
 [Oo]bj/
 [Ll]og/
 # Visual Studio 2015 cache/options directory
 .vs/
 # Uncomment if you have tasks that create the project's static files in wwwroot
 #wwwroot/
 # MSTest test Results
 [Tt]est[Rr]esult*/
 [Bb]uild[Ll]og.*
 # NUNIT
 *.VisualState.xml
 TestResult.xml
 # Build Results of an ATL Project
 [Dd]ebugPS/
 [Rr]eleasePS/
 dlldata.c
 # DNX
 project.lock.json
 project.fragment.lock.json
 artifacts/
 *_i.c
 *_p.c
 *_i.h
 *.ilk
 *.meta
 *.obj
 *.pch
 *.pdb
 *.pgc
 *.pgd
 *.rsp
 *.sbr
 *.tlb
 *.tli
 *.tlh
 *.tmp
 *.tmp_proj
 *.log
 *.vspscc
 *.vssscc
 .builds
 *.pidb
 *.svclog
 *.scc
 # Chutzpah Test files
 _Chutzpah*
 # Visual C++ cache files
 ipch/
 *.aps
 *.ncb
 *.opendb
 *.opensdf
 *.sdf
 *.cachefile
 *.VC.db
 *.VC.VC.opendb
 # Visual Studio profiler
 *.psess
 *.vsp
 *.vspx
 *.sap
 # TFS 2012 Local Workspace
 $tf/
 # Guidance Automation Toolkit
 *.gpState
 # ReSharper is a .NET coding add-in
 _ReSharper*/
 *.[Rr]e[Ss]harper
 *.DotSettings.user
 # JustCode is a .NET coding add-in
 .JustCode
 # TeamCity is a build add-in
 _TeamCity*
 # DotCover is a Code Coverage Tool
 *.dotCover
 # NCrunch
 _NCrunch_*
 .*crunch*.local.xml
 nCrunchTemp_*
 # MightyMoose
 *.mm.*
 AutoTest.Net/
 # Web workbench (sass)
 .sass-cache/
 # Installshield output folder
 [Ee]xpress/
 # DocProject is a documentation generator add-in
 DocProject/buildhelp/
 DocProject/Help/*.HxT
 DocProject/Help/*.HxC
 DocProject/Help/*.hhc
 DocProject/Help/*.hhk
 DocProject/Help/*.hhp
 DocProject/Help/Html2
 DocProject/Help/html
 # Click-Once directory
 publish/
 # Publish Web Output
 *.[Pp]ublish.xml
 *.azurePubxml
 # TODO: Comment the next line if you want to checkin your web deploy settings
 # but database connection strings (with potential passwords) will be unencrypted
 #*.pubxml
 *.publishproj
 # Microsoft Azure Web App publish settings. Comment the next line if you want to
 # checkin your Azure Web App publish settings, but sensitive information contained
 # in these scripts will be unencrypted
 PublishScripts/
 # NuGet Packages
 *.nupkg
 # The packages folder can be ignored because of Package Restore
 **/packages/*
 # except build/, which is used as an MSBuild target.
 !**/packages/build/
 # Uncomment if necessary however generally it will be regenerated when needed
 #!**/packages/repositories.config
 # NuGet v3's project.json files produces more ignoreable files
 *.nuget.props
 *.nuget.targets
 # Microsoft Azure Build Output
 csx/
 *.build.csdef
 # Microsoft Azure Emulator
 ecf/
 rcf/
 # Windows Store app package directories and files
 AppPackages/
 BundleArtifacts/
 Package.StoreAssociation.xml
 _pkginfo.txt
 # Visual Studio cache files
 # files ending in .cache can be ignored
 *.[Cc]ache
 # but keep track of directories ending in .cache
 !*.[Cc]ache/
 # Others
 ClientBin/
 ~$*
 *~
 *.dbmdl
 *.dbproj.schemaview
 *.jfm
 *.pfx
 *.publishsettings
 node_modules/
 orleans.codegen.cs
 # Since there are multiple workflows, uncomment next line to ignore bower_components
 # (https://github.com/github/gitignore/pull/1529#issuecomment-104372622)
 #bower_components/
 # RIA/Silverlight projects
 Generated_Code/
 # Backup & report files from converting an old project file
 # to a newer Visual Studio version. Backup files are not needed,
 # because we have git ;-)
 _UpgradeReport_Files/
 Backup*/
 UpgradeLog*.XML
 UpgradeLog*.htm
 # SQL Server files
 *.mdf
 *.ldf
 # Business Intelligence projects
 *.rdl.data
 *.bim.layout
 *.bim_*.settings
 # Microsoft Fakes
 FakesAssemblies/
 # GhostDoc plugin setting file
 *.GhostDoc.xml
 # Node.js Tools for Visual Studio
 .ntvs_analysis.dat
 # Visual Studio 6 build log
 *.plg
 # Visual Studio 6 workspace options file
 *.opt
 # Visual Studio LightSwitch build output
 **/*.HTMLClient/GeneratedArtifacts
 **/*.DesktopClient/GeneratedArtifacts
 **/*.DesktopClient/ModelManifest.xml
 **/*.Server/GeneratedArtifacts
 **/*.Server/ModelManifest.xml
 _Pvt_Extensions
 # Paket dependency manager
 .paket/paket.exe
 paket-files/
 # FAKE - F# Make
 .fake/
 # JetBrains Rider
 .idea/
 *.sln.iml
 # CodeRush
 .cr/
 # Python Tools for Visual Studio (PTVS)
 __pycache__/
 *.pyc
 # Visual Studio Code configuration files
 .vscode/
--- a/FasterBase64.Benchmarks/FasterBase64.Benchmarks.csproj
+++ b/FasterBase64.Benchmarks/FasterBase64.Benchmarks.csproj
@ -0,0 +1,17 @@
 <Project Sdk="Microsoft.NET.Sdk">
  <PropertyGroup>
    <OutputType>Exe</OutputType>
    <TargetFramework>net6.0</TargetFramework>
    <Nullable>enable</Nullable>
  </PropertyGroup>
  <ItemGroup>
    <PackageReference Include="BenchmarkDotNet" Version="0.13.1" />
  </ItemGroup>
  <ItemGroup>
    <ProjectReference Include="..\FasterBase64\FasterBase64.csproj" />
  </ItemGroup>
 </Project>
--- a/FasterBase64.Benchmarks/FasterFromBase64Benchmarks.cs
+++ b/FasterBase64.Benchmarks/FasterFromBase64Benchmarks.cs
@ -0,0 +1,37 @@
 using BenchmarkDotNet.Attributes;
 using System;
 public class FasterFromBase64Benchmarks
 {
    private byte[] bytes;
    private char[] chars;
    [Params(100, 1000, 10000)]
    public int N { get; set; }
    [GlobalSetup]
    public void GlobalSetup()
    {
        var originalBytes = new byte[N];
        var random = new Random(1);
        random.NextBytes(originalBytes);
        var resultSize = (1 + (N - 1) / 3) * 4;
        chars = new char[resultSize];
        System.Convert.TryToBase64Chars(originalBytes, chars, out var _);
        bytes = new byte[N];
    }
    [Benchmark]
    public byte Old()
    {
        System.Convert.TryFromBase64Chars(chars, bytes, out var _);
        return bytes[^1];
    }
    [Benchmark]
    public byte New()
    {
        FasterBase64.Convert.TryFromBase64Chars(chars, bytes, out var _);
        return bytes[^1];
    }
 }
--- a/FasterBase64.Benchmarks/FasterToBase64Benchmarks.cs
+++ b/FasterBase64.Benchmarks/FasterToBase64Benchmarks.cs
@ -0,0 +1,35 @@
 using BenchmarkDotNet.Attributes;
 using System;
 public class FasterToBase64Benchmarks
 {
    private byte[] bytes;
    private char[] chars;
    [Params(100, 1000, 10000)]
    public int N { get; set; }
    [GlobalSetup]
    public void GlobalSetup()
    {
        bytes = new byte[N];
        var resultSize = (1 + (N - 1) / 3) * 4;
        chars = new char[resultSize];
        var random = new Random(1);
        random.NextBytes(bytes);
    }
    [Benchmark]
    public char Old()
    {
        System.Convert.TryToBase64Chars(bytes, chars, out var _);
        return chars[^1];
    }
    [Benchmark]
    public char New()
    {
        FasterBase64.Convert.TryToBase64Chars(bytes, chars, out var _);
        return chars[^1];
    }
 }
--- a/FasterBase64.Benchmarks/Program.cs
+++ b/FasterBase64.Benchmarks/Program.cs
@ -0,0 +1,9 @@
 using BenchmarkDotNet.Running;
 public class Program
 {
    public static void Main()
    {
        BenchmarkRunner.Run<FasterToBase64Benchmarks>();
    }
 }
--- a/FasterBase64.Tests/ConvertTests.cs
+++ b/FasterBase64.Tests/ConvertTests.cs
@ -0,0 +1,217 @@
 using FluentAssertions;
 using FsCheck.Xunit;
 using System;
 using System.Collections.Generic;
 using Xunit;
 namespace FasterBase64.Tests
 {
    public class ConvertTests
    {
        [Property(MaxTest = 1000)]
        public void TestTryToBase64CharsRandom(byte[] bytes)
        {
            var n = bytes.Length;
            var charsLength = GetExactLengthInChars(n);
            var expectedChars = new char[charsLength];
            var expected = System.Convert.TryToBase64Chars(bytes, expectedChars, out var expectedCharsWritten);
            var actualChars = new char[charsLength];
            var actual = FasterBase64.Convert.TryToBase64Chars(bytes, actualChars, out var actualCharsWritten);
            actual.Should().Be(expected);
            actualCharsWritten.Should().Be(expectedCharsWritten);
            for (var i = 0; i < n; i++)
            {
                actualChars[i].Should().Be(expectedChars[i]);
            }
        }
        [Theory]
        [MemberData(nameof(Base64Pairs))]
        public void TestTryToBase64CharsExactSize(byte[] bytes, char[] chars)
        {
            var n = chars.Length;
            var actualChars = new char[n];
            var actual = FasterBase64.Convert.TryToBase64Chars(bytes, actualChars, out var charsWritten);
            actual.Should().BeTrue();
            charsWritten.Should().Be(n);
            for (var i = 0; i < n; i++)
            {
                actualChars[i].Should().Be(chars[i]);
            }
        }
        [Theory]
        [MemberData(nameof(Base64Pairs))]
        public void TestTryFromBase64CharsExactSize(byte[] bytes, char[] chars)
        {
            var n = bytes.Length;
            var actualBytes = new byte[n];
            var actual = FasterBase64.Convert.TryFromBase64Chars(chars, actualBytes, out int bytesWritten);
            actual.Should().BeTrue();
            bytesWritten.Should().Be(n);
            for (var i = 0; i < n; i++)
            {
                actualBytes[i].Should().Be(bytes[i]);
            }
        }
        [Theory]
        [MemberData(nameof(TryToBase64CharsWrongSizeTestData))]
        public void TestTryToBase64CharsWrongSize(byte[] bytes, bool expected, char[] chars, int charsWritten)
        {
            var n = chars.Length;
            var actualChars = new char[n];
            var actual = FasterBase64.Convert.TryToBase64Chars(bytes, actualChars, out var actualCharsWritten);
            actual.Should().Be(expected);
            actualCharsWritten.Should().Be(charsWritten);
            for (var i = 0; i < n; i++)
            {
                actualChars[i].Should().Be(chars[i]);
            }
        }
        [Theory]
        [MemberData(nameof(TryFromBase64CharsWrongSizeTestData))]
        [MemberData(nameof(TryFromBase64CharsInvalidCharsTestData))]
        public void TestTryFromBase64CharsWrong(char[] chars, int n, int bytesWritten)
        {
            var actualBytes = new byte[n];
            var actual = FasterBase64.Convert.TryFromBase64Chars(chars, actualBytes, out var actualBytesWritten);
            actual.Should().BeFalse();
            actualBytesWritten.Should().Be(bytesWritten);
        }
        public static IEnumerable<object[]> Base64Pairs()
        {
            foreach (var (bytes, chars) in TryToBase64CharsExactSizeTestDataTyped())
            {
                yield return new object[] { bytes, chars };
            }
        }
        public static IEnumerable<object[]> TryToBase64CharsWrongSizeTestData()
        {
            foreach (var (bytes, expected, chars, charsWritten) in TryToBase64CharsWrongSizeTestDataTyped())
            {
                yield return new object[] { bytes, expected, chars, charsWritten };
            }
        }
        public static IEnumerable<object[]> TryFromBase64CharsWrongSizeTestData()
        {
            foreach (var (chars, n, bytesWritten) in TryFromBase64CharsWrongSizeTestDataTyped())
            {
                yield return new object[] { chars, n, bytesWritten };
            }
        }
        public static IEnumerable<object[]> TryFromBase64CharsInvalidCharsTestData()
        {
            foreach (var (chars, n, bytesWritten) in TryFromBase64CharsInvalidCharsTestDataTyped())
            {
                yield return new object[] { chars, n, bytesWritten };
            }
        }
        private static IEnumerable<(byte[] Bytes, char[] Chars)> TryToBase64CharsExactSizeTestDataTyped()
        {
            var random = new Random(1);
            foreach (var bytes in TestBytes(random))
            {
                var n = bytes.Length;
                var charsLength = GetExactLengthInChars(n);
                var chars = new char[charsLength];
                System.Convert.TryToBase64Chars(bytes, chars, out var _);
                yield return (bytes, chars);
            }
        }
        private static IEnumerable<(byte[] Bytes, bool Expected, char[] Chars, int CharsWritten)> TryToBase64CharsWrongSizeTestDataTyped()
        {
            var random = new Random(1);
            foreach (var bytes in TestBytes(random))
            {
                var n = bytes.Length;
                var exactCharsLength = GetExactLengthInChars(n);
                var charsLength = random.Next(0, exactCharsLength + 10);
                var chars = new char[charsLength];
                var expected = System.Convert.TryToBase64Chars(bytes, chars, out var charsWritten);
                yield return (bytes, expected, chars, charsWritten);
            }
        }
        private static IEnumerable<(char[] Chars, int N, int BytesWritten)> TryFromBase64CharsWrongSizeTestDataTyped()
        {
            var random = new Random(1);
            foreach (var bytes in TestBytes(random))
            {
                var n = bytes.Length;
                if (n == 0)
                {
                    continue;
                }
                var charsLength = GetExactLengthInChars(n);
                var chars = new char[charsLength];
                System.Convert.TryToBase64Chars(bytes, chars, out var charsWritten);
                var wrongN = random.Next(0, n - 1);
                var wrongBytes = new byte[wrongN];
                var expected = System.Convert.TryFromBase64Chars(chars, wrongBytes, out var bytesWritten);
                yield return (chars, wrongN, bytesWritten);
            }
        }
        private static IEnumerable<(char[] Chars, int N, int BytesWritten)> TryFromBase64CharsInvalidCharsTestDataTyped()
        {
            var random = new Random(1);
            var invalidChars = " \n\r\t!@#$%^&*()_\\\x0430\x0410".ToCharArray();
            foreach (var bytes in TestBytes(random))
            {
                var n = bytes.Length;
                if (n == 0)
                {
                    continue;
                }
                var charsLength = GetExactLengthInChars(n);
                var chars = new char[charsLength];
                System.Convert.TryToBase64Chars(bytes, chars, out var charsWritten);
                var wrongIndex = random.Next(0, charsLength - 1);
                var wrongChar = invalidChars[random.Next(0, invalidChars.Length - 1)];
                chars[wrongIndex] = wrongChar;
                var wrongBytes = new byte[n];
                var expected = System.Convert.TryFromBase64Chars(chars, wrongBytes, out var bytesWritten);
                yield return (chars, n, bytesWritten);
            }
        }
        private static int GetExactLengthInChars(int lengthInBytes)
        {
            return lengthInBytes == 0 ? 0 : (1 + (lengthInBytes - 1) / 3) * 4;
        }
        private static IEnumerable<byte[]> TestBytes(Random random)
        {
            yield return new byte[] { };
            yield return new byte[] { 0 };
            yield return new byte[] { 255 };
            for (var n = 1; n <= 128; n++)
            {
                for (var iteration = 1; iteration <= 10; iteration++)
                {
                    var bytes = new byte[n];
                    random.NextBytes(bytes);
                    yield return bytes;
                }
            }
        }
    }
 }
--- a/FasterBase64.Tests/FasterBase64.Tests.csproj
+++ b/FasterBase64.Tests/FasterBase64.Tests.csproj
@ -0,0 +1,31 @@
 <Project Sdk="Microsoft.NET.Sdk">
  <PropertyGroup>
    <TargetFramework>net6.0</TargetFramework>
    <Nullable>enable</Nullable>
    <IsPackable>false</IsPackable>
    <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
  </PropertyGroup>
  <ItemGroup>
    <PackageReference Include="FluentAssertions" Version="6.2.0" />
    <PackageReference Include="FsCheck.Xunit" Version="2.16.3" />
    <PackageReference Include="Microsoft.NET.Test.Sdk" Version="16.11.0" />
    <PackageReference Include="xunit" Version="2.4.1" />
    <PackageReference Include="xunit.runner.visualstudio" Version="2.4.3">
      <IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
      <PrivateAssets>all</PrivateAssets>
    </PackageReference>
    <PackageReference Include="coverlet.collector" Version="3.1.0">
      <IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
      <PrivateAssets>all</PrivateAssets>
    </PackageReference>
  </ItemGroup>
  <ItemGroup>
    <ProjectReference Include="..\FasterBase64\FasterBase64.csproj" />
  </ItemGroup>
 </Project>
--- a/FasterBase64.sln
+++ b/FasterBase64.sln
@ -0,0 +1,37 @@
 Microsoft Visual Studio Solution File, Format Version 12.00
 # Visual Studio Version 17
 VisualStudioVersion = 17.0.31912.275
 MinimumVisualStudioVersion = 10.0.40219.1
 Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "FasterBase64", "FasterBase64\FasterBase64.csproj", "{C6D9044D-8B5E-42E5-A699-ED8B212B6227}"
 EndProject
 Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "FasterBase64.Tests", "FasterBase64.Tests\FasterBase64.Tests.csproj", "{CE48F6A6-19CB-4980-8EE2-5EA4415C8A2A}"
 EndProject
 Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "FasterBase64.Benchmarks", "FasterBase64.Benchmarks\FasterBase64.Benchmarks.csproj", "{3F77301C-50A9-4B64-8BFF-2C97834788C7}"
 EndProject
 Global
 	GlobalSection(SolutionConfigurationPlatforms) = preSolution
 		Debug|Any CPU = Debug|Any CPU
 		Release|Any CPU = Release|Any CPU
 	EndGlobalSection
 	GlobalSection(ProjectConfigurationPlatforms) = postSolution
 		{C6D9044D-8B5E-42E5-A699-ED8B212B6227}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
 		{C6D9044D-8B5E-42E5-A699-ED8B212B6227}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{C6D9044D-8B5E-42E5-A699-ED8B212B6227}.Release|Any CPU.ActiveCfg = Release|Any CPU
 		{C6D9044D-8B5E-42E5-A699-ED8B212B6227}.Release|Any CPU.Build.0 = Release|Any CPU
 		{CE48F6A6-19CB-4980-8EE2-5EA4415C8A2A}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
 		{CE48F6A6-19CB-4980-8EE2-5EA4415C8A2A}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{CE48F6A6-19CB-4980-8EE2-5EA4415C8A2A}.Release|Any CPU.ActiveCfg = Release|Any CPU
 		{CE48F6A6-19CB-4980-8EE2-5EA4415C8A2A}.Release|Any CPU.Build.0 = Release|Any CPU
 		{3F77301C-50A9-4B64-8BFF-2C97834788C7}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
 		{3F77301C-50A9-4B64-8BFF-2C97834788C7}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{3F77301C-50A9-4B64-8BFF-2C97834788C7}.Release|Any CPU.ActiveCfg = Release|Any CPU
 		{3F77301C-50A9-4B64-8BFF-2C97834788C7}.Release|Any CPU.Build.0 = Release|Any CPU
 	EndGlobalSection
 	GlobalSection(SolutionProperties) = preSolution
 		HideSolutionNode = FALSE
 	EndGlobalSection
 	GlobalSection(ExtensibilityGlobals) = postSolution
 		SolutionGuid = {67B1AE7B-C332-4546-80DE-629D417D557E}
 	EndGlobalSection
 EndGlobal
--- a/FasterBase64/Convert_From.cs
+++ b/FasterBase64/Convert_From.cs
@ -0,0 +1,181 @@
 // The following code is based on
 // Wojciech Muła, Daniel Lemire, Faster Base64 Encoding and Decoding using AVX2 instructions,
 // arXiv:1704.00605v5.
 // We process input data in chunks of 32 chars; every chunk becomes
 // 24 decoded bytes.
 // If there are leftovers that are smaller than 34 (!) bytes, we process
 // them by calling the .NET implementation. The reason for the number 34
 // is we don't want to deal with '=' padding characters that can
 // happen at the end of the string (up to 2 of them).
 // Note, however, that (in contrast to the .NET implementation)
 // we do not allow whitespace in the input.
 // If you know that your input might contain whitespace characters,
 // you should remove them first, and then call the conversion method.
 //
 // Processing consists of three steps:
 // 1. Take the next 32 characters and decode them from WTF-16 to 6-bit values.
 //    Here we also check that all characters in the input are valid.
 //    After this step, we have a vector of 32 bytes, each having a value
 //    between 0 and 63.
 // 2. Pack these 32 6-bit chunks into 24 bytes.
 // 3. Write the resulting bytes to the output location.
 // Below we describe the steps in more detail.
 //
 // FIRST STEP: decoding WTF-16 into 6-bit values.
 // We start by taking two blocks of 16 characters (32 bytes),
 // checking them for the values that do not fit into the first byte,
 // and pack 32 characters into 32 bytes. This involves crossing the
 // lanes.
 // Every byte is then split into high and low nibble.
 // A character is a valid base64 character if and only if
 // a) its high nibble H is 2, 3, 4, 5, 6, 7;
 // b) when H = 2, the low nibble is 11 or 15;
 //    when H = 3, the low nibble is in 0..9;
 //    when H = 4 or 6, the low nibble is non-zero;
 //    when H = 5 or 7, the low nibble is in 0..10.
 // This is exactly what happens in the code; see Appendix C
 // of [Muła, Lemire] for the detailed explanations.
 //
 // SECOND STEP: packing 32 6-bit chunks into 24 bytes.
 // To achieve that we need only four instructions; namely, we
 // a) use MultiplyAddAdjacent (on byte level) to pack the data
 //    within (16-bit) words;
 // b) use MultiplyAddAdjacent (on word level) to pack the data
 //    within (32-bit) double words;
 // c) use Shuffle to pack the data within 128-bit lanes;
 // d) use PermuteVar8x32 to pack data into first 24 bytes of
 //    our 32-byte vector. This, of course, involves crossing
 //    the lanes.
 // We demonstrate the first two steps on 32-bit double words:
 // For a), we multiply the bytes alternately with 0x40 and 0x01,
 // which results in shifts by 6 bits of every other byte.
 // Then the instruction adds together adjacent pairs of resulting
 // 16-bit integers. So, starting from
 // 00xxxxxx | 00yyyyyy | 00zzzzzz | 00tttttt
 // we multiply by
 // 01000000 | 00000001 | 01000000 | 00000001
 // to get intermediate results
 // 0000xxxxxx000000, 0000000000yyyyyy, 0000zzzzzz000000, 0000000000tttttt
 // and add them pairwise, so we get
 // 0000xxxxxxyyyyyy | 0000zzzzzztttttt
 // Rewriting this into bytes, we get
 // xxyyyyyy | 0000xxxx | zztttttt | 0000zzzz
 // Step b) is similar: we shift every other word by 12 bits.
 // So, we multiply by
 // 0001000000000000 | 0000000000000001
 // to get intermediate results
 // 00000000xxxxxxyyyyyy000000000000 and 00000000000000000000zzzzzztttttt
 // and add them pairwise, so we get
 // 00000000xxxxxxyyyyyyzzzzzztttttt
 // This is exactly what is needed to pack four 6-bit values into
 // three 8-bit values.
 // Step c) then shuffles the bytes to pack the data bytes at the
 // beginning of each lane; step d) permutes them to move everything
 // into the first 24 bytes.
 using System.Runtime.InteropServices;
 using System.Runtime.Intrinsics;
 using System.Runtime.Intrinsics.X86;
 namespace FasterBase64
 {
    public static partial class Convert
    {
        public static unsafe bool TryFromBase64Chars(
            ReadOnlySpan<char> chars,
            Span<byte> bytes,
            out int bytesWritten)
        {
            var inputLength = chars.Length;
            var outputLength = bytes.Length;
            bytesWritten = 0;
            if (inputLength >= 34)
            {
                var utf8mask = Vector256.Create((ushort)0xff00).AsInt16();
                var const2f = Vector256.Create((byte) 0x2f);
                var lutLo = Vector256.Create(
                    (byte)21, 17, 17, 17, 17, 17, 17, 17, 17, 17, 19, 26, 27, 27, 27, 26,
                    21, 17, 17, 17, 17, 17, 17, 17, 17, 17, 19, 26, 27, 27, 27, 26);
                var lutHi = Vector256.Create(
                    (byte)16, 16, 1, 2, 4, 8, 4, 8, 16, 16, 16, 16, 16, 16, 16, 16,
                    16, 16, 1, 2, 4, 8, 4, 8, 16, 16, 16, 16, 16, 16, 16, 16);
                var lutRoll = Vector256.Create(
                    (sbyte)0, 16, 19, 4, -65, -65, -71, -71, 0, 0, 0, 0, 0, 0, 0, 0,
                    0, 16, 19, 4, -65, -65, -71, -71, 0, 0, 0, 0, 0, 0, 0, 0);
                var helper1 = Vector256.Create(0x01400140).AsSByte();
                var helper2 = Vector256.Create(0x00011000);
                var helper3 = Vector256.Create(
                    (sbyte)2, 1, 0, 6, 5, 4, 10, 9, 8, 14, 13, 12, -1, -1, -1, -1,
                    2, 1, 0, 6, 5, 4, 10, 9, 8, 14, 13, 12, -1, -1, -1, -1);
                var helper4 = Vector256.Create(0, 1, 2, 4, 5, 6, -1, -1);
                fixed (byte* bytesPtr = bytes)
                fixed (short* charsPtr = MemoryMarshal.Cast<char, short>(chars))
                {
                    var currentBytesPtr = bytesPtr;
                    var currentInputPtr = charsPtr;
                    while (inputLength >= 34 && outputLength >= 32)
                    {
                        var input1 = Avx2.LoadVector256(currentInputPtr);
                        if (!Avx2.TestZ(input1, utf8mask))
                        {
                            bytesWritten = 0;
                            return false;
                        }
                        var input2 = Avx2.LoadVector256(currentInputPtr + 16);
                        if (!Avx2.TestZ(input2, utf8mask))
                        {
                            bytesWritten = 0;
                            return false;
                        }
                        currentInputPtr += 32;
                        inputLength -= 32;
                        var packedInput = Avx2.PackUnsignedSaturate(input1, input2);
                        var input = Avx2.Permute4x64(packedInput.AsUInt64(), (byte)0b_11_01_10_00).AsByte();
                        var hiNibbles = Avx2.ShiftRightLogical(input.AsInt32(), 4).AsByte();
                        var loNibbles = Avx2.And(input, const2f);
                        var lo = Avx2.Shuffle(lutLo, loNibbles);
                        var eq2f = Avx2.CompareEqual(input, const2f);
                        hiNibbles = Avx2.And(hiNibbles, const2f);
                        var hi = Avx2.Shuffle(lutHi, hiNibbles);
                        var roll = Avx2.Shuffle(lutRoll, Avx2.Add(eq2f, hiNibbles).AsSByte());
                        if (!Avx2.TestZ(lo, hi))
                        {
                            bytesWritten = 0;
                            return false;
                        }
                        var fromAscii = Avx2.Add(input.AsSByte(), roll);
                        var mergeXYandZT = Avx2.MultiplyAddAdjacent(fromAscii.AsByte(), helper1);
                        var packedWithinLanes = Avx2.MultiplyAddAdjacent(mergeXYandZT, helper2.AsInt16());
                        packedWithinLanes = Avx2.Shuffle(packedWithinLanes.AsByte(), helper3.AsByte()).AsInt32();
                        var final = Avx2.PermuteVar8x32(packedWithinLanes, helper4).AsByte();
                        Avx2.Store(currentBytesPtr, final);
                        bytesWritten += 24;
                        currentBytesPtr += 24;
                        outputLength -= 24;
                    }
                }
            }
            var result = System.Convert.TryFromBase64Chars(chars[^inputLength..], bytes[bytesWritten..], out var bytesWritten2);
            if (result)
            {
                bytesWritten += bytesWritten2;
            }
            else
            {
                bytesWritten = 0;
            }
            return result;
        }
    }
 }
--- a/FasterBase64/Convert_To.cs
+++ b/FasterBase64/Convert_To.cs
@ -0,0 +1,188 @@
 // The following code is based on
 // Wojciech Muła, Daniel Lemire, Faster Base64 Encoding and Decoding using AVX2 instructions,
 // arXiv:1704.00605v5.
 // We process input data in chunks of 24 bytes; every chunk becomes
 // 32 encoded characters.
 // If there are leftovers that are smaller than 24 bytes, we process
 // them by calling the .NET implementation.
 // The main processing routine requires that input 24 bytes are located
 // in the middle 24 bytes of a 32-byte vector that we load from memory.
 // Due to that, special handling is required for the first chunk
 // (we have to load first 32 bytes and shift it right by 4 bytes).
 // Processing consists of three steps:
 // 1. Split 24 input bytes into 32 6-bit chunks,
 //    each chunk stored into the lower 6 bits of consecutive bytes.
 //    This is where we need the data to be in the middle (and not the first)
 //    24 bytes: we don't want to cross the lanes.
 //    This way, the first 12 bytes stay in the first half of
 //    Vector256, and expand to fill the first half; same with the
 //    second half.
 // 2. Convert a sequence of 32 bytes (with possible values from 0 to 63)
 //    to their ASCII representations in base64.
 // 3. Stores the resulting 32 bytes into the low bytes of 32 words
 //    at the output location.
 // Below we describe the steps in more detail.
 //
 // FIRST STEP: splitting into 6-bit chunks.
 // Here we describe the procedure for the first half (containing
 // the first 12 bytes of the input); the second half is processed
 // at the same time in a symmetrical fashion.
 // We split these 12 bytes into 4 chunks of 3 bytes each.
 // Our target is to re-shuffle these 24 bits into 4 chunks
 // of 6 bits each, and write them into lower bits of 4
 // consecutive bytes:
 // xxxxxxxx | yyyyyyyy | zzzzzzzz
 // ->
 // 00xxxxxx | 00xxyyyy | 00yyyyzz | 00zzzzzz
 // First, we shuffle these 3 bytes into 4 bytes (repeating
 // the middle byte) in the following way:
 // xxxxxxxx | yyyyyyyy | zzzzzzzz              // inputVector
 // ->
 // yyyyyyyy | xxxxxxxx | zzzzzzzz | yyyyyyyy   // inputWithRepeat
 // Then, we AND the result with 0x0fc0fc00 to extract both
 // the "bbbbcc" and "aaaaaa" parts:
 // 00000000 | 11111100 | 11000000 | 00001111   // 0x0fc0fc00
 // 00000000 | xxxxxx00 | zz000000 | 0000yyyy   // masked1
 // Another AND (with 0x003f03f0) extracts the "aabbbb" and "cccccc" parts:
 // 11110000 | 00000011 | 00111111 | 00000000   // 0x003f03f0
 // yyyy0000 | 000000xx | 00zzzzzz | 00000000   // masked2
 // Multiplication shifts these parts into their proper place
 // (note that we use multiplication with storing high words
 // to effectively achieve right shift; and multiplication with
 // storing low words to effectively achieve left shift).
 // To understand how multiplication works, we need to rewrite
 // our data as words:
 // xxxxxx0000000000 | 0000yyyyzz000000         // masked1
 // 0000000001000000 | 0000010000000000         // shift1
 // 0000000000xxxxxx | 0000000000yyyyzz         // maskedAndShifted1
 // Similarly, for the second part:
 // 000000xxyyyy0000 | 0000000000zzzzzz         // masked2
 // 0000000000010000 | 0000000100000000         // shift2
 // 00xxyyyy00000000 | 00zzzzzz00000000         // maskedAndShifted2
 // After rewriting it back to bytes, we get
 // 00xxxxxx | 00000000 | 00yyyyzz | 00000000   // maskedAndShifted1
 // 00000000 | 00xxyyyy | 00000000 | 00zzzzzz   // maskedAndShifted2
 // Final result is OR of these two.
 //
 // SECOND STEP: encoding 6-bit values in base64.
 // Recall that base64 maps
 //  0..25 -> A..Z  (ASCII codes 65..90)
 // 26..51 -> a..z (ASCII codes 97..122)
 // 52..61 -> 0..9 (ASCII codes 48..57)
 // 62     -> + (ASCII code 43)
 // 63     -> . (ASCII code 47)
 // Thus our job is to add the following offsets:
 // 65 to values 0..25
 // 71 to values 26..51
 // -4 to values 52..61
 // -19 to value 62
 // -16 to value 63
 // First, we subract 51 with saturation, so 0..25 and 26..51 become zero,
 // while values 52..63 become 1..12.
 // We have an offset map that says that 2..11 should map to -4,
 // 12 should map to -19, and 13 should map to -16.
 // Thus, the values 0..25 currently map to 0, while
 // the mapping for all other values is off by 1.
 // Now we need to add 1 in cases 26..63, and add 0 in cases 0..25.
 // This is done by comparing the values with 25:
 // if a value is greater than 25, we get -1, otherwise 0.
 // This is (up to a sign) what we wanted, so we can just
 // subtract the comparison result to achieve the required +1.
 // It remains to apply the offset map and add the resulting
 // offset to the input.
 //
 // THIRD STEP: store result to memory.
 // This is quite easy: store the lower half of a 32-byte vector,
 // interleaving bytes with zeros, then do the same for the higher half.
 using System.Runtime.InteropServices;
 using System.Runtime.Intrinsics;
 using System.Runtime.Intrinsics.X86;
 namespace FasterBase64
 {
    public static partial class Convert
    {
        public static unsafe bool TryToBase64Chars(ReadOnlySpan<byte> bytes, Span<char> chars, out int charsWritten)
        {
            var inputLength = bytes.Length;
            charsWritten = 0;
            if (inputLength == 0)
            {
                return true;
            }
            var outputLength = chars.Length;
            var expectedLength = (1 + (inputLength - 1) / 3) * 4;
            if (outputLength < expectedLength)
            {
                return false;
            }
            var permuter = Vector256.Create(0, 0, 1, 2, 3, 4, 5, 6);
            var mask1 = Vector256.Create(0x0fc0fc00).AsByte();
            var shift1 = Vector256.Create(0x04000040).AsUInt16();
            var mask2 = Vector256.Create(0x003f03f0).AsByte();
            var shift2 = Vector256.Create(0x01000010).AsUInt16();
            var const51 = Vector256.Create((byte)51);
            var const25 = Vector256.Create((byte)25);
            var shuffleVector = Vector256.Create(
                (byte)5, 4, 6, 5, 8, 7, 9, 8, 11, 10, 12, 11, 14, 13, 15, 14,
                1, 0, 2, 1, 4, 3, 5, 4, 7, 6, 8, 7, 10, 9, 11, 10);
            var offsetMap = Vector256.Create(
                (sbyte)65, 71, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -19, -16, 0, 0,
                65, 71, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -19, -16, 0, 0).AsByte();
            if (inputLength >= 32)
            {
                fixed (byte* bytesPtr = bytes)
                fixed (short* charsPtr = MemoryMarshal.Cast<char, short>(chars))
                {
                    var currentInputPtr = bytesPtr;
                    var currentOutputPtr = charsPtr;
                    Vector256<byte> inputVector;
                    var preInputVector = Avx2.LoadVector256(currentInputPtr);
                    currentInputPtr -= 4;
                    inputVector = Avx2.PermuteVar8x32(preInputVector.AsInt32(), permuter).AsByte();
 MainLoop:
                    var inputWithRepeat = Avx2.Shuffle(inputVector, shuffleVector);
                    var masked1 = Avx2.And(inputWithRepeat, mask1);
                    var maskedAndShifted1 = Avx2.MultiplyHigh(masked1.AsUInt16(), shift1);
                    var masked2 = Avx2.And(inputWithRepeat, mask2);
                    var maskedAndShifted2 = Avx2.MultiplyLow(masked2.AsUInt16(), shift2);
                    var shuffled = Avx2.Or(maskedAndShifted1, maskedAndShifted2).AsByte();
                    var shuffleResult = Avx2.SubtractSaturate(shuffled, const51);
                    var less = Avx2.CompareGreaterThan(shuffled.AsSByte(), const25.AsSByte()).AsByte();
                    shuffleResult = Avx2.Subtract(shuffleResult, less);
                    var offsets = Avx2.Shuffle(offsetMap, shuffleResult);
                    var translated = Avx2.Add(offsets, shuffled);
                    var lower = translated.GetLower();
                    var lowerInterleaved = Avx2.ConvertToVector256Int16(lower);
                    Avx2.Store(currentOutputPtr, lowerInterleaved);
                    currentOutputPtr += 16;
                    var upper = translated.GetUpper();
                    var upperInterleaved = Avx2.ConvertToVector256Int16(upper);
                    Avx2.Store(currentOutputPtr, upperInterleaved);
                    currentOutputPtr += 16;
                    currentInputPtr += 24;
                    inputLength -= 24;
                    if (inputLength >= 28)
                    {
                        inputVector = Avx2.LoadVector256(currentInputPtr);
                        goto MainLoop;
                    }
                    charsWritten = (int)(currentOutputPtr - charsPtr);
                }
            }
            var result = System.Convert.TryToBase64Chars(bytes[^inputLength..], chars[charsWritten..], out var charsWritten2);
            charsWritten += charsWritten2;
            return result;
        }
    }
 }
--- a/FasterBase64/FasterBase64.csproj
+++ b/FasterBase64/FasterBase64.csproj
@ -0,0 +1,10 @@
 <Project Sdk="Microsoft.NET.Sdk">
  <PropertyGroup>
    <TargetFramework>net6.0</TargetFramework>
    <ImplicitUsings>enable</ImplicitUsings>
    <Nullable>enable</Nullable>
    <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
  </PropertyGroup>
 </Project>
--- a/README.md
+++ b/README.md
@ -1,2 +1,37 @@
-# fasterbase64
+# FasterBase64
-Base64 encoding/decoding in C#
+
 An implementation of a fast base64 encoding algorithm in C#.
 The algorithm is described in
 [Faster Base64 Encoding and Decoding Using AVX2 Instructions](https://arxiv.org/abs/1704.00605)
 by Wojciech Muła and Daniel Lemire.
 Benchmarks show that this implementation is about 8-10 times faster
 than the standard .NET implementation
 in `System.Convert.TryToBase64Chars()` and `System.Convert.TryFromBase64Chars()`.
 We provide re-implementations of these two methods
 within a static class `FasterBase64.Convert`.
 * `FasterBase64.Convert.TryToBase64Chars()` works
  the same way as `System.Convert.TryToBase64Chars()`;
 * `System.FasterBase64.TryFromBase64Chars()` works
  the same way as`System.Convert.TryFromBase64Chars()`,
  if the input does not contain whitespace. The standard .NET implementation
  differs from the RFC4648 standard in that it allows whitespace in the data.
  If you need an implementation that skips whitespace, it is easy
  to copy the data, omitting whitespace, and then call
  `System.FasterBase64.TryFromBase64Chars()`.
  We believe that (in all reasonable cases) this is still faster than
  using the standard implementation.
 Caveats:
 * Although we believe the implementation to be reasonably well tested,
  there might still be bugs.
 * The implementation uses AVX2 instructions, but does not check if AVX2
  is available. AVX2 support can be easily checked by querying the property
  `System.Runtime.Intrinsics.X86.Avx2.IsSupported`.
 TODO:
 * NuGet package.
 License:
 * Copyright (c) Alexander Luzgarev, 2021, under the GPL license.