Improver writing of compressed data

This commit is contained in:
Alexander Luzgarev 2025-03-30 13:52:21 +02:00
parent f56508c3ad
commit 716846b6a1
8 changed files with 292 additions and 48 deletions

View File

@ -0,0 +1,17 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net8.0</TargetFramework>
<Nullable>enable</Nullable>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="BenchmarkDotNet" Version="0.14.0" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\MatFileHandler\MatFileHandler.csproj" />
</ItemGroup>
</Project>

47
Benchmarks/Program.cs Normal file
View File

@ -0,0 +1,47 @@
using BenchmarkDotNet.Attributes;
using BenchmarkDotNet.Running;
using MatFileHandler;
using System;
using System.IO;
namespace Benchmarks;
[MemoryDiagnoser]
public class BigWriteBenchmark
{
private IMatFile? matFile;
[GlobalSetup]
public void GlobalSetup()
{
var m = 1000;
var n = 10000;
var builder = new DataBuilder();
var array = builder.NewArray<double>(m, n);
var random = new Random(1);
for (var i = 0; i < m * n; i++)
{
array[i] = random.NextDouble();
}
var variable = builder.NewVariable("test", array);
matFile = builder.NewFile(new[] { variable });
}
[Benchmark]
public void V1()
{
using var stream = new MemoryStream();
var writer = new MatFileWriter(stream);
writer.Write(matFile!);
}
}
internal class Program
{
static void Main(string[] args)
{
Console.WriteLine("Hello, World!");
BenchmarkRunner.Run<BigWriteBenchmark>();
}
}

View File

@ -0,0 +1,105 @@
// Copyright 2017-2018 Alexander Luzgarev
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;
using Xunit;
namespace MatFileHandler.Tests;
/// <summary>
/// Tests for the <see cref="ChecksumCalculatingStream"/> class.
/// </summary>
public class ChecksumCalculatingStreamTests
{
/// <summary>
/// Test writing various things.
/// </summary>
/// <param name="action"></param>
[Theory]
[MemberData(nameof(TestData))]
public void Test(Action<Stream> action)
{
using var stream = new MemoryStream();
var sut = new ChecksumCalculatingStream(stream);
action(sut);
var actual = sut.GetCrc();
var expected = ReferenceCalculation(action);
}
/// <summary>
/// Test data for <see cref="Test"/>.
/// </summary>
/// <returns>Test data.</returns>
public static IEnumerable<object[]> TestData()
{
foreach (var data in TestData_Typed())
{
yield return new object[] { data };
}
}
private static IEnumerable<Action<Stream>> TestData_Typed()
{
yield return BinaryWriterAction(w => w.Write(true));
yield return BinaryWriterAction(w => w.Write(false));
yield return BinaryWriterAction(w => w.Write(byte.MinValue));
yield return BinaryWriterAction(w => w.Write(byte.MaxValue));
yield return BinaryWriterAction(w => w.Write(short.MinValue));
yield return BinaryWriterAction(w => w.Write(short.MaxValue));
yield return BinaryWriterAction(w => w.Write(int.MinValue));
yield return BinaryWriterAction(w => w.Write(int.MaxValue));
yield return BinaryWriterAction(w => w.Write(long.MinValue));
yield return BinaryWriterAction(w => w.Write(long.MaxValue));
yield return BinaryWriterAction(w => w.Write(decimal.MinValue));
yield return BinaryWriterAction(w => w.Write(decimal.MaxValue));
yield return BinaryWriterAction(w => w.Write(double.MinValue));
yield return BinaryWriterAction(w => w.Write(double.MaxValue));
yield return BinaryWriterAction(w => w.Write(double.PositiveInfinity));
yield return BinaryWriterAction(w => w.Write(double.NaN));
yield return BinaryWriterAction(w => w.Write(new byte[] { 1, 2, 3, 4, 5, 6, 7}));
yield return BinaryWriterAction(w => w.Write(Enumerable.Range(0, 255).SelectMany(x => Enumerable.Range(0, 255)).Select(x => (byte)x).ToArray()));
}
private static Action<Stream> BinaryWriterAction(Action<BinaryWriter> action)
{
return stream =>
{
using var writer = new BinaryWriter(stream, Encoding.UTF8, leaveOpen: true);
action(writer);
};
}
private uint ReferenceCalculation(Action<Stream> action)
{
using var stream = new MemoryStream();
action(stream);
stream.Position = 0;
return CalculateAdler32Checksum(stream);
}
private static uint CalculateAdler32Checksum(Stream stream)
{
uint s1 = 1;
uint s2 = 0;
const uint bigPrime = 0xFFF1;
const int bufferSize = 2048;
var buffer = new byte[bufferSize];
while (true)
{
var bytesRead = stream.Read(buffer, 0, bufferSize);
for (var i = 0; i < bytesRead; i++)
{
s1 = (s1 + buffer[i]) % bigPrime;
s2 = (s2 + s1) % bigPrime;
}
if (bytesRead < bufferSize)
{
break;
}
}
return (s2 << 16) | s1;
}
}

View File

@ -1,6 +1,7 @@
<Project Sdk="Microsoft.NET.Sdk"> <Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup> <PropertyGroup>
<TargetFrameworks>net8.0;net472</TargetFrameworks> <TargetFrameworks>net8.0;net472</TargetFrameworks>
<LangVersion>10.0</LangVersion>
<IsPackable>false</IsPackable> <IsPackable>false</IsPackable>
</PropertyGroup> </PropertyGroup>
<PropertyGroup> <PropertyGroup>

View File

@ -1,7 +1,7 @@
 
Microsoft Visual Studio Solution File, Format Version 12.00 Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio Version 16 # Visual Studio Version 17
VisualStudioVersion = 16.0.28621.142 VisualStudioVersion = 17.10.34928.147
MinimumVisualStudioVersion = 10.0.40219.1 MinimumVisualStudioVersion = 10.0.40219.1
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "MatFileHandler", "MatFileHandler\MatFileHandler.csproj", "{C0CD11D3-016A-4FCD-AF0B-D745F79F3749}" Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "MatFileHandler", "MatFileHandler\MatFileHandler.csproj", "{C0CD11D3-016A-4FCD-AF0B-D745F79F3749}"
EndProject EndProject
@ -14,6 +14,8 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution
README.md = README.md README.md = README.md
EndProjectSection EndProjectSection
EndProject EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Benchmarks", "Benchmarks\Benchmarks.csproj", "{CDCF50F8-AA1C-460A-BF54-8D5A4386813E}"
EndProject
Global Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU Debug|Any CPU = Debug|Any CPU
@ -28,6 +30,10 @@ Global
{4E09DE2D-13D2-458C-BBD2-BE65AAE30CC7}.Debug|Any CPU.Build.0 = Debug|Any CPU {4E09DE2D-13D2-458C-BBD2-BE65AAE30CC7}.Debug|Any CPU.Build.0 = Debug|Any CPU
{4E09DE2D-13D2-458C-BBD2-BE65AAE30CC7}.Release|Any CPU.ActiveCfg = Release|Any CPU {4E09DE2D-13D2-458C-BBD2-BE65AAE30CC7}.Release|Any CPU.ActiveCfg = Release|Any CPU
{4E09DE2D-13D2-458C-BBD2-BE65AAE30CC7}.Release|Any CPU.Build.0 = Release|Any CPU {4E09DE2D-13D2-458C-BBD2-BE65AAE30CC7}.Release|Any CPU.Build.0 = Release|Any CPU
{CDCF50F8-AA1C-460A-BF54-8D5A4386813E}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{CDCF50F8-AA1C-460A-BF54-8D5A4386813E}.Debug|Any CPU.Build.0 = Debug|Any CPU
{CDCF50F8-AA1C-460A-BF54-8D5A4386813E}.Release|Any CPU.ActiveCfg = Release|Any CPU
{CDCF50F8-AA1C-460A-BF54-8D5A4386813E}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection EndGlobalSection
GlobalSection(SolutionProperties) = preSolution GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE HideSolutionNode = FALSE

View File

@ -0,0 +1,91 @@
// Copyright 2017-2018 Alexander Luzgarev
using System;
using System.IO;
namespace MatFileHandler;
/// <summary>
/// A stream that calculates Adler32 checksum of everything
/// written to it before passing to another stream.
/// </summary>
internal class ChecksumCalculatingStream : Stream
{
private const uint BigPrime = 0xFFF1;
private readonly Stream _stream;
private uint s1 = 1;
private uint s2 = 0;
/// <summary>
/// Initializes a new instance of the <see cref="ChecksumCalculatingStream"/> class.
/// </summary>
/// <param name="stream">Wrapped stream.</param>
public ChecksumCalculatingStream(Stream stream)
{
_stream = stream;
}
/// <inheritdoc />
public override bool CanRead => false;
/// <inheritdoc />
public override bool CanSeek => false;
/// <inheritdoc />
public override bool CanWrite => true;
/// <inheritdoc />
public override long Length => throw new NotImplementedException();
/// <inheritdoc />
public override long Position
{
get => throw new NotImplementedException();
set => throw new NotImplementedException();
}
/// <inheritdoc />
public override void Flush()
{
_stream.Flush();
}
/// <inheritdoc />
public override int Read(byte[] buffer, int offset, int count)
{
throw new NotImplementedException();
}
/// <inheritdoc />
public override long Seek(long offset, SeekOrigin origin)
{
throw new NotImplementedException();
}
/// <inheritdoc />
public override void SetLength(long value)
{
throw new NotImplementedException();
}
/// <inheritdoc />
public override void Write(byte[] buffer, int offset, int count)
{
for (var i = offset; i < offset + count; i++)
{
s1 = (s1 + buffer[i]) % BigPrime;
s2 = (s2 + s1) % BigPrime;
}
_stream.Write(buffer, offset, count);
}
/// <summary>
/// Calculate the checksum of everything written to the stream so far.
/// </summary>
/// <returns>Checksum of everything written to the stream so far.</returns>
public uint GetCrc()
{
return (s2 << 16) | s1;
}
}

View File

@ -44,4 +44,7 @@
<ItemGroup> <ItemGroup>
<None Include="..\LICENSE.md" Pack="true" PackagePath=""/> <None Include="..\LICENSE.md" Pack="true" PackagePath=""/>
</ItemGroup> </ItemGroup>
<ItemGroup>
<InternalsVisibleTo Include="MatFileHandler.Tests" />
</ItemGroup>
</Project> </Project>

View File

@ -67,29 +67,6 @@ namespace MatFileHandler
} }
} }
private static uint CalculateAdler32Checksum(Stream stream)
{
uint s1 = 1;
uint s2 = 0;
const uint bigPrime = 0xFFF1;
const int bufferSize = 2048;
var buffer = new byte[bufferSize];
while (true)
{
var bytesRead = stream.Read(buffer, 0, bufferSize);
for (var i = 0; i < bytesRead; i++)
{
s1 = (s1 + buffer[i]) % bigPrime;
s2 = (s2 + s1) % bigPrime;
}
if (bytesRead < bufferSize)
{
break;
}
}
return (s2 << 16) | s1;
}
private void WriteHeader(BinaryWriter writer, Header header) private void WriteHeader(BinaryWriter writer, Header header)
{ {
writer.Write(Encoding.UTF8.GetBytes(header.Text)); writer.Write(Encoding.UTF8.GetBytes(header.Text));
@ -637,31 +614,28 @@ namespace MatFileHandler
private void WriteCompressedVariable(BinaryWriter writer, IVariable variable) private void WriteCompressedVariable(BinaryWriter writer, IVariable variable)
{ {
using (var compressedStream = new MemoryStream()) var position = writer.BaseStream.Position;
{ WriteTag(writer, new Tag(DataType.MiCompressed, 0));
uint crc;
using (var originalStream = new MemoryStream())
{
using (var internalWriter = new BinaryWriter(originalStream))
{
WriteVariable(internalWriter, variable);
originalStream.Position = 0;
crc = CalculateAdler32Checksum(originalStream);
originalStream.Position = 0;
using (var compressionStream =
new DeflateStream(compressedStream, CompressionMode.Compress, leaveOpen: true))
{
originalStream.CopyTo(compressionStream);
}
}
}
compressedStream.Position = 0;
WriteTag(writer, new Tag(DataType.MiCompressed, (int)(compressedStream.Length + 6)));
writer.Write((byte)0x78); writer.Write((byte)0x78);
writer.Write((byte)0x9c); writer.Write((byte)0x9c);
compressedStream.CopyTo(writer.BaseStream); int compressedLength;
writer.Write(BitConverter.GetBytes(crc).Reverse().ToArray()); uint crc;
var before = writer.BaseStream.Position;
using (var compressionStream = new DeflateStream(writer.BaseStream, CompressionMode.Compress, leaveOpen: true))
{
using var checksumStream = new ChecksumCalculatingStream(compressionStream);
using var internalWriter = new BinaryWriter(checksumStream, Encoding.UTF8, leaveOpen: true);
WriteVariable(internalWriter, variable);
crc = checksumStream.GetCrc();
} }
var after = writer.BaseStream.Position;
compressedLength = (int)(after - before) + 6;
writer.Write(BitConverter.GetBytes(crc).Reverse().ToArray());
writer.BaseStream.Position = position;
WriteTag(writer, new Tag(DataType.MiCompressed, compressedLength));
writer.BaseStream.Seek(0, SeekOrigin.End);
} }
} }
} }