From 716846b6a19f1fb2ba515992e2bf4c529d21d351 Mon Sep 17 00:00:00 2001 From: Alexander Luzgarev Date: Sun, 30 Mar 2025 13:52:21 +0200 Subject: [PATCH] Improver writing of compressed data --- Benchmarks/Benchmarks.csproj | 17 +++ Benchmarks/Program.cs | 47 ++++++++ .../ChecksumCalculatingStreamTests.cs | 105 ++++++++++++++++++ .../MatFileHandler.Tests.csproj | 1 + MatFileHandler.sln | 10 +- MatFileHandler/ChecksumCalculatingStream.cs | 91 +++++++++++++++ MatFileHandler/MatFileHandler.csproj | 3 + MatFileHandler/MatFileWriter.cs | 66 ++++------- 8 files changed, 292 insertions(+), 48 deletions(-) create mode 100644 Benchmarks/Benchmarks.csproj create mode 100644 Benchmarks/Program.cs create mode 100644 MatFileHandler.Tests/ChecksumCalculatingStreamTests.cs create mode 100644 MatFileHandler/ChecksumCalculatingStream.cs diff --git a/Benchmarks/Benchmarks.csproj b/Benchmarks/Benchmarks.csproj new file mode 100644 index 0000000..400e6a5 --- /dev/null +++ b/Benchmarks/Benchmarks.csproj @@ -0,0 +1,17 @@ + + + + Exe + net8.0 + enable + + + + + + + + + + + diff --git a/Benchmarks/Program.cs b/Benchmarks/Program.cs new file mode 100644 index 0000000..fd5278d --- /dev/null +++ b/Benchmarks/Program.cs @@ -0,0 +1,47 @@ +using BenchmarkDotNet.Attributes; +using BenchmarkDotNet.Running; +using MatFileHandler; +using System; +using System.IO; + +namespace Benchmarks; + +[MemoryDiagnoser] +public class BigWriteBenchmark +{ + private IMatFile? matFile; + + [GlobalSetup] + public void GlobalSetup() + { + var m = 1000; + var n = 10000; + var builder = new DataBuilder(); + var array = builder.NewArray(m, n); + var random = new Random(1); + for (var i = 0; i < m * n; i++) + { + array[i] = random.NextDouble(); + } + + var variable = builder.NewVariable("test", array); + matFile = builder.NewFile(new[] { variable }); + } + + [Benchmark] + public void V1() + { + using var stream = new MemoryStream(); + var writer = new MatFileWriter(stream); + writer.Write(matFile!); + } +} + +internal class Program +{ + static void Main(string[] args) + { + Console.WriteLine("Hello, World!"); + BenchmarkRunner.Run(); + } +} diff --git a/MatFileHandler.Tests/ChecksumCalculatingStreamTests.cs b/MatFileHandler.Tests/ChecksumCalculatingStreamTests.cs new file mode 100644 index 0000000..9548c2b --- /dev/null +++ b/MatFileHandler.Tests/ChecksumCalculatingStreamTests.cs @@ -0,0 +1,105 @@ +// Copyright 2017-2018 Alexander Luzgarev + +using System; +using System.Collections.Generic; +using System.IO; +using System.Linq; +using System.Text; +using Xunit; + +namespace MatFileHandler.Tests; + +/// +/// Tests for the class. +/// +public class ChecksumCalculatingStreamTests +{ + /// + /// Test writing various things. + /// + /// + [Theory] + [MemberData(nameof(TestData))] + public void Test(Action action) + { + using var stream = new MemoryStream(); + var sut = new ChecksumCalculatingStream(stream); + action(sut); + var actual = sut.GetCrc(); + var expected = ReferenceCalculation(action); + } + + /// + /// Test data for . + /// + /// Test data. + public static IEnumerable TestData() + { + foreach (var data in TestData_Typed()) + { + yield return new object[] { data }; + } + } + + private static IEnumerable> TestData_Typed() + { + yield return BinaryWriterAction(w => w.Write(true)); + yield return BinaryWriterAction(w => w.Write(false)); + yield return BinaryWriterAction(w => w.Write(byte.MinValue)); + yield return BinaryWriterAction(w => w.Write(byte.MaxValue)); + yield return BinaryWriterAction(w => w.Write(short.MinValue)); + yield return BinaryWriterAction(w => w.Write(short.MaxValue)); + yield return BinaryWriterAction(w => w.Write(int.MinValue)); + yield return BinaryWriterAction(w => w.Write(int.MaxValue)); + yield return BinaryWriterAction(w => w.Write(long.MinValue)); + yield return BinaryWriterAction(w => w.Write(long.MaxValue)); + yield return BinaryWriterAction(w => w.Write(decimal.MinValue)); + yield return BinaryWriterAction(w => w.Write(decimal.MaxValue)); + yield return BinaryWriterAction(w => w.Write(double.MinValue)); + yield return BinaryWriterAction(w => w.Write(double.MaxValue)); + yield return BinaryWriterAction(w => w.Write(double.PositiveInfinity)); + yield return BinaryWriterAction(w => w.Write(double.NaN)); + yield return BinaryWriterAction(w => w.Write(new byte[] { 1, 2, 3, 4, 5, 6, 7})); + yield return BinaryWriterAction(w => w.Write(Enumerable.Range(0, 255).SelectMany(x => Enumerable.Range(0, 255)).Select(x => (byte)x).ToArray())); + } + + private static Action BinaryWriterAction(Action action) + { + return stream => + { + using var writer = new BinaryWriter(stream, Encoding.UTF8, leaveOpen: true); + action(writer); + }; + } + + private uint ReferenceCalculation(Action action) + { + using var stream = new MemoryStream(); + action(stream); + stream.Position = 0; + return CalculateAdler32Checksum(stream); + } + + private static uint CalculateAdler32Checksum(Stream stream) + { + uint s1 = 1; + uint s2 = 0; + const uint bigPrime = 0xFFF1; + const int bufferSize = 2048; + var buffer = new byte[bufferSize]; + while (true) + { + var bytesRead = stream.Read(buffer, 0, bufferSize); + for (var i = 0; i < bytesRead; i++) + { + s1 = (s1 + buffer[i]) % bigPrime; + s2 = (s2 + s1) % bigPrime; + } + if (bytesRead < bufferSize) + { + break; + } + } + return (s2 << 16) | s1; + } +} diff --git a/MatFileHandler.Tests/MatFileHandler.Tests.csproj b/MatFileHandler.Tests/MatFileHandler.Tests.csproj index 3f44415..f746f1e 100755 --- a/MatFileHandler.Tests/MatFileHandler.Tests.csproj +++ b/MatFileHandler.Tests/MatFileHandler.Tests.csproj @@ -1,6 +1,7 @@  net8.0;net472 + 10.0 false diff --git a/MatFileHandler.sln b/MatFileHandler.sln index ad160eb..6b97b42 100755 --- a/MatFileHandler.sln +++ b/MatFileHandler.sln @@ -1,7 +1,7 @@  Microsoft Visual Studio Solution File, Format Version 12.00 -# Visual Studio Version 16 -VisualStudioVersion = 16.0.28621.142 +# Visual Studio Version 17 +VisualStudioVersion = 17.10.34928.147 MinimumVisualStudioVersion = 10.0.40219.1 Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "MatFileHandler", "MatFileHandler\MatFileHandler.csproj", "{C0CD11D3-016A-4FCD-AF0B-D745F79F3749}" EndProject @@ -14,6 +14,8 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution README.md = README.md EndProjectSection EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Benchmarks", "Benchmarks\Benchmarks.csproj", "{CDCF50F8-AA1C-460A-BF54-8D5A4386813E}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU @@ -28,6 +30,10 @@ Global {4E09DE2D-13D2-458C-BBD2-BE65AAE30CC7}.Debug|Any CPU.Build.0 = Debug|Any CPU {4E09DE2D-13D2-458C-BBD2-BE65AAE30CC7}.Release|Any CPU.ActiveCfg = Release|Any CPU {4E09DE2D-13D2-458C-BBD2-BE65AAE30CC7}.Release|Any CPU.Build.0 = Release|Any CPU + {CDCF50F8-AA1C-460A-BF54-8D5A4386813E}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {CDCF50F8-AA1C-460A-BF54-8D5A4386813E}.Debug|Any CPU.Build.0 = Debug|Any CPU + {CDCF50F8-AA1C-460A-BF54-8D5A4386813E}.Release|Any CPU.ActiveCfg = Release|Any CPU + {CDCF50F8-AA1C-460A-BF54-8D5A4386813E}.Release|Any CPU.Build.0 = Release|Any CPU EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE diff --git a/MatFileHandler/ChecksumCalculatingStream.cs b/MatFileHandler/ChecksumCalculatingStream.cs new file mode 100644 index 0000000..0cd6835 --- /dev/null +++ b/MatFileHandler/ChecksumCalculatingStream.cs @@ -0,0 +1,91 @@ +// Copyright 2017-2018 Alexander Luzgarev + +using System; +using System.IO; + +namespace MatFileHandler; + +/// +/// A stream that calculates Adler32 checksum of everything +/// written to it before passing to another stream. +/// +internal class ChecksumCalculatingStream : Stream +{ + private const uint BigPrime = 0xFFF1; + private readonly Stream _stream; + private uint s1 = 1; + private uint s2 = 0; + + /// + /// Initializes a new instance of the class. + /// + /// Wrapped stream. + public ChecksumCalculatingStream(Stream stream) + { + _stream = stream; + } + + /// + public override bool CanRead => false; + + /// + public override bool CanSeek => false; + + /// + public override bool CanWrite => true; + + /// + public override long Length => throw new NotImplementedException(); + + /// + public override long Position + { + get => throw new NotImplementedException(); + set => throw new NotImplementedException(); + } + + /// + public override void Flush() + { + _stream.Flush(); + } + + /// + public override int Read(byte[] buffer, int offset, int count) + { + throw new NotImplementedException(); + } + + /// + public override long Seek(long offset, SeekOrigin origin) + { + throw new NotImplementedException(); + } + + /// + public override void SetLength(long value) + { + throw new NotImplementedException(); + } + + /// + public override void Write(byte[] buffer, int offset, int count) + { + for (var i = offset; i < offset + count; i++) + { + s1 = (s1 + buffer[i]) % BigPrime; + s2 = (s2 + s1) % BigPrime; + } + + _stream.Write(buffer, offset, count); + } + + /// + /// Calculate the checksum of everything written to the stream so far. + /// + /// Checksum of everything written to the stream so far. + public uint GetCrc() + { + return (s2 << 16) | s1; + } +} diff --git a/MatFileHandler/MatFileHandler.csproj b/MatFileHandler/MatFileHandler.csproj index 903b54f..0a668e0 100755 --- a/MatFileHandler/MatFileHandler.csproj +++ b/MatFileHandler/MatFileHandler.csproj @@ -44,4 +44,7 @@ + + + diff --git a/MatFileHandler/MatFileWriter.cs b/MatFileHandler/MatFileWriter.cs index 4f9a932..8ae8166 100755 --- a/MatFileHandler/MatFileWriter.cs +++ b/MatFileHandler/MatFileWriter.cs @@ -67,29 +67,6 @@ namespace MatFileHandler } } - private static uint CalculateAdler32Checksum(Stream stream) - { - uint s1 = 1; - uint s2 = 0; - const uint bigPrime = 0xFFF1; - const int bufferSize = 2048; - var buffer = new byte[bufferSize]; - while (true) - { - var bytesRead = stream.Read(buffer, 0, bufferSize); - for (var i = 0; i < bytesRead; i++) - { - s1 = (s1 + buffer[i]) % bigPrime; - s2 = (s2 + s1) % bigPrime; - } - if (bytesRead < bufferSize) - { - break; - } - } - return (s2 << 16) | s1; - } - private void WriteHeader(BinaryWriter writer, Header header) { writer.Write(Encoding.UTF8.GetBytes(header.Text)); @@ -637,31 +614,28 @@ namespace MatFileHandler private void WriteCompressedVariable(BinaryWriter writer, IVariable variable) { - using (var compressedStream = new MemoryStream()) + var position = writer.BaseStream.Position; + WriteTag(writer, new Tag(DataType.MiCompressed, 0)); + writer.Write((byte)0x78); + writer.Write((byte)0x9c); + int compressedLength; + uint crc; + var before = writer.BaseStream.Position; + using (var compressionStream = new DeflateStream(writer.BaseStream, CompressionMode.Compress, leaveOpen: true)) { - uint crc; - using (var originalStream = new MemoryStream()) - { - using (var internalWriter = new BinaryWriter(originalStream)) - { - WriteVariable(internalWriter, variable); - originalStream.Position = 0; - crc = CalculateAdler32Checksum(originalStream); - originalStream.Position = 0; - using (var compressionStream = - new DeflateStream(compressedStream, CompressionMode.Compress, leaveOpen: true)) - { - originalStream.CopyTo(compressionStream); - } - } - } - compressedStream.Position = 0; - WriteTag(writer, new Tag(DataType.MiCompressed, (int)(compressedStream.Length + 6))); - writer.Write((byte)0x78); - writer.Write((byte)0x9c); - compressedStream.CopyTo(writer.BaseStream); - writer.Write(BitConverter.GetBytes(crc).Reverse().ToArray()); + using var checksumStream = new ChecksumCalculatingStream(compressionStream); + using var internalWriter = new BinaryWriter(checksumStream, Encoding.UTF8, leaveOpen: true); + WriteVariable(internalWriter, variable); + crc = checksumStream.GetCrc(); } + + var after = writer.BaseStream.Position; + compressedLength = (int)(after - before) + 6; + + writer.Write(BitConverter.GetBytes(crc).Reverse().ToArray()); + writer.BaseStream.Position = position; + WriteTag(writer, new Tag(DataType.MiCompressed, compressedLength)); + writer.BaseStream.Seek(0, SeekOrigin.End); } } } \ No newline at end of file