From fd9a30147bc4e812dff9f46c18fa569eaffd32cc Mon Sep 17 00:00:00 2001 From: MarkPflug Date: Fri, 8 Sep 2023 09:48:21 -0700 Subject: [PATCH 1/2] Add async for ExcelDataReader --- docs/Async.md | 29 ++ .../ExcelDataReaderTests.cs | 60 ++++ .../Sylvan.Data.Excel.Tests.csproj | 5 + source/Sylvan.Data.Excel.sln | 1 + .../Common/PooledMemoryStream.cs | 282 ++++++++++++++++++ source/Sylvan.Data.Excel/ExcelDataReader.cs | 163 +++++++--- .../Sylvan.Data.Excel.csproj | 4 +- .../Xls/XlsWorkbookReader+RecordReader.cs | 35 ++- .../Xls/XlsWorkbookReader.cs | 93 +++--- .../Xlsb/XlsbWorkbookReader.cs | 8 +- .../Sylvan.Data.Excel/Xlsx/XlsxDataWriter.cs | 1 + .../Xlsx/XlsxWorkbookReader.cs | 11 +- 12 files changed, 561 insertions(+), 131 deletions(-) create mode 100644 docs/Async.md create mode 100644 source/Sylvan.Data.Excel/Common/PooledMemoryStream.cs diff --git a/docs/Async.md b/docs/Async.md new file mode 100644 index 0000000..54cc1ec --- /dev/null +++ b/docs/Async.md @@ -0,0 +1,29 @@ +# Async Support + +ExcelDataReader and ExcelDataWriter both support "async" operations. +In order to ensure proper async behavior, you must use the `CreateAsync` +method to create instances. In this mode, the entire file will be +asynchronously buffered into memory. + +Reading: +``` +// this line will buffer the entire file into memory. +await using var edr = ExcelDataReader.CreateAsync("jumbo.xlsx"); + +while(await edr.ReadAsync()) +{ + // ... +} + +``` + +Writing: +``` +await using var edw = ExcelDataWriter.CreateAsync("jumbo.xlsx"); + +edw.WriteAsync(myDataReader, "MyData"); +edw.WriteAsync(myOtherDataReader, "MoreData"); + +// when the ExcelDataWriter is asynchronously disposed +// the buffered file is asynchronously written to the output. +``` \ No newline at end of file diff --git a/source/Sylvan.Data.Excel.Tests/ExcelDataReaderTests.cs b/source/Sylvan.Data.Excel.Tests/ExcelDataReaderTests.cs index ae44ed0..8ccd4e3 100644 --- a/source/Sylvan.Data.Excel.Tests/ExcelDataReaderTests.cs +++ b/source/Sylvan.Data.Excel.Tests/ExcelDataReaderTests.cs @@ -5,6 +5,7 @@ using System.IO; using System.Runtime.CompilerServices; using System.Text; +using System.Threading.Tasks; using Xunit; namespace Sylvan.Data.Excel; @@ -1253,6 +1254,65 @@ public void Date1900() Assert.Equal(45678d, edr.GetDouble(0)); Assert.Equal(new DateTime(2025, 1, 21), edr.GetDateTime(0)); } + +#if ASYNC + + [Fact] + public async Task BasicAsync() + { + var name = GetFile("Big"); + await using var edr = await ExcelDataReader.CreateAsync(name); + while (await edr.ReadAsync()) + { + for (int i = 0; i < edr.RowFieldCount; i++) + { + edr.GetValue(i); + } + } + } + + [Fact] + public async Task StreamAsync() + { + var name = GetFile("Big"); + var stream = File.OpenRead(name); + + var testStream = new TestStream(stream); + + await using var edr = await ExcelDataReader.CreateAsync(testStream, this.WorkbookType); + while (await edr.ReadAsync()) + { + for (int i = 0; i < edr.RowFieldCount; i++) + { + edr.GetValue(i); + } + } + + // The stream should still be open, because we own it. + Assert.False(testStream.IsClosed); + } + + [Fact] + public async Task OwnedStreamAsync() + { + var name = GetFile("Big"); + var stream = File.OpenRead(name); + + var testStream = new TestStream(stream); + var opts = new ExcelDataReaderOptions { OwnsStream = true }; + await using var edr = await ExcelDataReader.CreateAsync(testStream, this.WorkbookType, opts); + while (await edr.ReadAsync()) + { + for (int i = 0; i < edr.RowFieldCount; i++) + { + edr.GetValue(i); + } + } + // The stream should have been closed for us, due to OwnsStream = true. + Assert.True(testStream.IsClosed); + } + +#endif } public sealed class XlsTests : XlsxTests diff --git a/source/Sylvan.Data.Excel.Tests/Sylvan.Data.Excel.Tests.csproj b/source/Sylvan.Data.Excel.Tests/Sylvan.Data.Excel.Tests.csproj index 8d0d52f..4f3a65b 100644 --- a/source/Sylvan.Data.Excel.Tests/Sylvan.Data.Excel.Tests.csproj +++ b/source/Sylvan.Data.Excel.Tests/Sylvan.Data.Excel.Tests.csproj @@ -7,6 +7,10 @@ true + + $(DefineConstants);ASYNC + + all @@ -19,6 +23,7 @@ + diff --git a/source/Sylvan.Data.Excel.sln b/source/Sylvan.Data.Excel.sln index 5bc8656..b87096d 100644 --- a/source/Sylvan.Data.Excel.sln +++ b/source/Sylvan.Data.Excel.sln @@ -9,6 +9,7 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Sylvan.Data.Excel.Tests", " EndProject Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Documentation", "Documentation", "{515E9519-9779-41B9-AEAE-F17DA70EFB1B}" ProjectSection(SolutionItems) = preProject + ..\docs\Async.md = ..\docs\Async.md ..\license.txt = ..\license.txt ..\readme.md = ..\readme.md ..\docs\Reference.md = ..\docs\Reference.md diff --git a/source/Sylvan.Data.Excel/Common/PooledMemoryStream.cs b/source/Sylvan.Data.Excel/Common/PooledMemoryStream.cs new file mode 100644 index 0000000..c2a1a46 --- /dev/null +++ b/source/Sylvan.Data.Excel/Common/PooledMemoryStream.cs @@ -0,0 +1,282 @@ +#if ASYNC +// this type is only needed for buffering Excel data for async operation. + +using System; +using System.Buffers; +using System.Diagnostics; +using System.IO; +using System.Threading; +using System.Threading.Tasks; + +namespace Sylvan.IO; + +/// +/// A memory-backed implementation using pooled buffers. +/// +/// +/// This class uses pooled buffers to reduce allocations, and memory clearing +/// that are present with . +/// +sealed class PooledMemoryStream : Stream +{ + static readonly ArrayPool DefaultPool = ArrayPool.Shared; + + const int DefaultBlockShift = 16; // default to 64k blocks + const int InitialBlockCount = 8; + + readonly ArrayPool bufferPool; + readonly int blockShift; + readonly int blockSize; + readonly int blockMask; + readonly bool clearOnReturn; + + long length; + long position; + + byte[]?[] blocks; + + /// + /// Creates a PooledMemoryStream. + /// + public PooledMemoryStream() : this(DefaultPool, DefaultBlockShift, false) + { + } + + /// + /// Creates a PooledMemoryStream. + /// + /// The to use. + /// The size of the buffer to use expressed 1 << blockShift. (Valid values 6 - 24) + /// A boolean indicating whether to clear the buffers after use. + public PooledMemoryStream(ArrayPool bufferPool, int blockShift = DefaultBlockShift, bool clearOnReturn = false) + { + if (blockShift < 6 || blockShift > 24) // 64b - 16MB + throw new ArgumentOutOfRangeException(nameof(blockShift)); + + this.bufferPool = bufferPool; + this.blocks = new byte[]?[InitialBlockCount]; + this.blockShift = blockShift; + this.blockSize = 1 << blockShift; + this.blockMask = blockSize - 1; + this.clearOnReturn = clearOnReturn; + } + + /// + public override bool CanRead => true; + /// + public override bool CanSeek => true; + /// + public override bool CanWrite => true; + /// + public override long Length => length; + /// + public override long Position + { + get + { + return position; + } + set + { + this.Seek(value, SeekOrigin.Begin); + } + } + + /// + public override void Flush() + { + } + + /// + public override int Read(byte[] buffer, int offset, int count) + { + if (buffer == null) throw new ArgumentNullException(nameof(buffer)); + if (count < 0) throw new ArgumentOutOfRangeException(nameof(count)); + if (offset + count > buffer.Length) throw new ArgumentOutOfRangeException(nameof(offset)); + + var avail = this.length - this.position; + var c = (int)(avail < count ? avail : count); + var len = c; + var pos = this.position; + while (c > 0) + { + var blockIdx = pos >> blockShift; + var curBlock = blocks[blockIdx]; + var blockOffset = (int)(pos & blockMask); + var blockRem = blockSize - blockOffset; + Debug.Assert(blockRem >= 0); + var cl = blockRem < c ? blockRem : c; + if (curBlock == null) + { + Array.Clear(buffer, offset, cl); + } + else + { + Buffer.BlockCopy(curBlock, blockOffset, buffer, offset, cl); + } + + pos += cl; + offset += cl; + c -= cl; + } + + this.position = pos; + return len; + } + + /// + public override long Seek(long offset, SeekOrigin origin) + { + long pos = 0; + switch (origin) + { + case SeekOrigin.Begin: + pos = offset; + break; + case SeekOrigin.Current: + pos = this.position + offset; + break; + case SeekOrigin.End: + pos = this.length + offset; + break; + } + if (pos < 0 || pos > this.length) + throw new ArgumentOutOfRangeException(nameof(offset)); + this.position = pos; + return pos; + } + + /// + public override void SetLength(long value) + { + if (value < 0) throw new ArgumentOutOfRangeException(nameof(value)); + + if (value < this.length) + { + long blocks = length >> blockShift; + long newBlocks = value >> blockShift; + + // if the stream shrunk, return any unused blocks + for (long i = newBlocks; i <= blocks && i < this.blocks.Length; i++) + { + var buffer = this.blocks[i]; + if (buffer != null) + { + this.blocks[i] = null; + this.bufferPool.Return(buffer, clearOnReturn); + } + this.length = value; + } + } + + this.length = value; + } + + /// + public override void Write(byte[] buffer, int offset, int count) + { + if (buffer == null) + throw new ArgumentNullException(nameof(buffer)); + if (offset >= buffer.Length) + throw new ArgumentOutOfRangeException(nameof(offset)); + if (count < 0 || offset + count > buffer.Length) + throw new ArgumentOutOfRangeException(nameof(count)); + + var shift = blockShift; + var blockSize = this.blockSize; + var blockMask = blockSize - 1; + + var endLength = this.position + count; + var reqBlockCount = (endLength + blockMask) >> shift; + + var blocks = this.blocks; + if (reqBlockCount > blocks.Length) + { + var newBlockCount = blocks.Length; + while (newBlockCount < reqBlockCount) + { + newBlockCount <<= 1; + } + + var newBuffers = new byte[]?[newBlockCount]; + Array.Copy(blocks, 0, newBuffers, 0, blocks.Length); + this.blocks = newBuffers; + } + + blocks = this.blocks; + var pos = this.position; + while (count > 0) + { + var blockIdx = pos >> shift; + var curBlock = blocks[blockIdx]; + if (curBlock == null) + { + curBlock = bufferPool.Rent(this.blockSize); + blocks[blockIdx] = curBlock; + } + var blockOffset = (int)(pos & blockMask); + var blockRem = blockSize - blockOffset; + Debug.Assert(blockRem >= 0); + var c = blockRem < count ? blockRem : count; + Buffer.BlockCopy(buffer, offset, curBlock, blockOffset, c); + count -= c; + pos = pos + c; + offset += c; + } + this.position = pos; + if (this.position > this.length) + this.length = this.position; + } + + /// + public override async Task CopyToAsync(Stream destination, int bufferSize, CancellationToken cancellationToken) + { + if (destination == null) throw new ArgumentNullException(nameof(destination)); + + while (position < length) + { + cancellationToken.ThrowIfCancellationRequested(); + var rem = length - position; + var blockIdx = position >> blockShift; + var block = this.blocks[blockIdx]!; + var blockOffset = (int)(position & blockMask); + var blockCount = blockSize - blockOffset; + var blockLen = rem < blockCount ? (int)rem : blockCount; + await destination.WriteAsync(block.AsMemory().Slice(blockOffset, blockLen), cancellationToken).ConfigureAwait(false); + position += blockLen; + } + } + + /// + public override void CopyTo(Stream destination, int bufferSize) + { + if (destination == null) throw new ArgumentNullException(nameof(destination)); + + while (position < length) + { + var rem = length - position; + var blockIdx = position >> blockShift; + var block = this.blocks[blockIdx]!; + var blockOffset = (int)(position & blockMask); + var blockCount = blockSize - blockOffset; + var blockLen = rem < blockCount ? (int)rem : blockCount; + destination.Write(block, blockOffset, blockLen); + position += blockLen; + } + } + + /// + protected override void Dispose(bool disposing) + { + base.Dispose(disposing); + for (int i = 0; i < this.blocks.Length; i++) + { + var block = this.blocks[i]; + if (block != null) + this.bufferPool.Return(block, clearOnReturn); + this.blocks[i] = null; + } + } +} + +#endif \ No newline at end of file diff --git a/source/Sylvan.Data.Excel/ExcelDataReader.cs b/source/Sylvan.Data.Excel/ExcelDataReader.cs index 9de0ac7..fdc17f9 100644 --- a/source/Sylvan.Data.Excel/ExcelDataReader.cs +++ b/source/Sylvan.Data.Excel/ExcelDataReader.cs @@ -3,6 +3,7 @@ using System.Collections; using System.Collections.Generic; using System.Collections.ObjectModel; +using System.ComponentModel.Design; using System.Data; using System.Data.Common; using System.Globalization; @@ -27,7 +28,9 @@ public abstract partial class ExcelDataReader : DbDataReader, IDisposable, IDbCo int fieldCount; bool isClosed; Stream stream; - +#pragma warning disable + bool isAsync; // currently unused, but intend to use it to enforce async access patterns. +#pragma warning restore private protected IExcelSchemaProvider schema; private protected State state; private protected ExcelColumn[] columnSchema; @@ -76,6 +79,7 @@ public sealed override DataTable GetSchemaTable() private protected ExcelDataReader(Stream stream, ExcelDataReaderOptions options) { + this.isAsync = false; this.stream = stream; this.schema = options.Schema; this.errorAsNull = options.GetErrorAsNull; @@ -97,6 +101,92 @@ private protected ExcelDataReader(Stream stream, ExcelDataReaderOptions options) this.ownsStream = options.OwnsStream; } +#if ASYNC + + /// + /// Asynchronously creates a new ExcelDataReader. + /// + /// The name of the file to open. + /// An optional ExcelDataReaderOptions instance. + /// A CancellationToken. + /// The ExcelDataReader. + /// If the filename refers to a file of an unknown type. + public static async Task CreateAsync(string filename, ExcelDataReaderOptions? options = null, CancellationToken cancel = default) + { + var type = GetWorkbookType(filename); + if (type == ExcelWorkbookType.Unknown) + throw new ArgumentException(null, nameof(filename)); + + var s = File.OpenRead(filename); + try + { + return await CreateAsync(s, type, options, cancel).ConfigureAwait(false); + } + finally + { + if (s != null) + { + var t = s.DisposeAsync(); + await t.AsTask().ConfigureAwait(false); + } + } + } + + /// + /// Creates a new ExcelDataReader instance. + /// + /// A stream containing the Excel file contents. + /// The type of file represented by the stream. + /// An optional ExcelDataReaderOptions instance. + /// + /// The ExcelDataReader. + public static async Task CreateAsync(Stream stream, ExcelWorkbookType fileType, ExcelDataReaderOptions? options = null, CancellationToken cancel = default) + { + options ??= ExcelDataReaderOptions.Default; + + var ms = new Sylvan.IO.PooledMemoryStream(); + await stream.CopyToAsync(ms, cancel).ConfigureAwait(false); + ms.Seek(0, SeekOrigin.Begin); + try + { + ExcelDataReader reader; + switch (fileType) + { + case ExcelWorkbookType.Excel: + reader = new Xls.XlsWorkbookReader(ms, options); + break; + case ExcelWorkbookType.ExcelXml: + reader = new XlsxWorkbookReader(ms, options); + break; + case ExcelWorkbookType.ExcelBinary: + reader = new Xlsb.XlsbWorkbookReader(ms, options); + break; + default: + throw new ArgumentException(nameof(fileType)); + } + // In async mode, the reader always owns the memory stream. + // This causes disposal to dispose the memorystream, and return any pooled buffers. + reader.ownsStream = true; + reader.isAsync = true; + return reader; + } + catch + { + ms?.Dispose(); + throw; + } + finally + { + if (options.OwnsStream) + { + var t = stream.DisposeAsync(); + await t.AsTask().ConfigureAwait(false); + } + } + } + +#endif + /// /// Creates a new ExcelDataReader. /// @@ -110,12 +200,11 @@ public static ExcelDataReader Create(string filename, ExcelDataReaderOptions? op if (type == ExcelWorkbookType.Unknown) throw new ArgumentException(null, nameof(filename)); - var s = new FileStream(filename, FileMode.Open, FileAccess.Read, FileShare.Read, 1); + var s = File.OpenRead(filename); try { var reader = Create(s, type, options); reader.ownsStream = true; - reader.stream = s; return reader; } catch (Exception) @@ -125,6 +214,30 @@ public static ExcelDataReader Create(string filename, ExcelDataReaderOptions? op } } + /// + /// Creates a new ExcelDataReader instance. + /// + /// A stream containing the Excel file contents. + /// The type of file represented by the stream. + /// An optional ExcelDataReaderOptions instance. + /// The ExcelDataReader. + public static ExcelDataReader Create(Stream stream, ExcelWorkbookType fileType, ExcelDataReaderOptions? options = null) + { + options = options ?? ExcelDataReaderOptions.Default; + + switch (fileType) + { + case ExcelWorkbookType.Excel: + return new Xls.XlsWorkbookReader(stream, options); + case ExcelWorkbookType.ExcelXml: + return new XlsxWorkbookReader(stream, options); + case ExcelWorkbookType.ExcelBinary: + return new Xlsb.XlsbWorkbookReader(stream, options); + default: + throw new ArgumentException(nameof(fileType)); + } + } + /// public override bool IsClosed => isClosed; @@ -150,30 +263,6 @@ public override void Close() /// public abstract int MaxFieldCount { get; } - /// - /// Creates a new ExcelDataReader instance. - /// - /// A stream containing the Excel file contents. - /// The type of file represented by the stream. - /// An optional ExcelDataReaderOptions instance. - /// The ExcelDataReader. - public static ExcelDataReader Create(Stream stream, ExcelWorkbookType fileType, ExcelDataReaderOptions? options = null) - { - options = options ?? ExcelDataReaderOptions.Default; - - switch (fileType) - { - case ExcelWorkbookType.Excel: - return Xls.XlsWorkbookReader.CreateAsync(stream, options).GetAwaiter().GetResult(); - case ExcelWorkbookType.ExcelXml: - return new XlsxWorkbookReader(stream, options); - case ExcelWorkbookType.ExcelBinary: - return new Xlsb.XlsbWorkbookReader(stream, options); - default: - throw new ArgumentException(nameof(fileType)); - } - } - /// /// Gets the type of an Excel workbook from the file name. /// @@ -188,20 +277,6 @@ public static ExcelWorkbookType GetWorkbookType(string filename) /// The name of the worksheet to open. /// True if the sheet was opened, otherwise false. public bool TryOpenWorksheet(string name) - { -#pragma warning disable // disable obsolete warning for now. - return TryOpenWorksheetAsync(name).GetAwaiter().GetResult(); -#pragma warning enable - } - - /// - /// Tries to open a worksheet. - /// - /// The name of the worksheet to open. - /// A cancellation token for the async operation. - /// True if the sheet was opened, otherwise false. - [Obsolete("TryOpenWorksheetAsync will be removed in a future version. Use TryOpenWorksheet instead.")] - public Task TryOpenWorksheetAsync(string name, CancellationToken cancel = default) { var sheetIdx = -1; for (int i = 0; i < this.sheetInfos.Length; i++) @@ -214,12 +289,12 @@ public Task TryOpenWorksheetAsync(string name, CancellationToken cancel = } if (sheetIdx == -1) { - return Task.FromResult(false); + return false; } - return OpenWorksheetAsync(sheetIdx, cancel); + return OpenWorksheet(sheetIdx); } - private protected abstract Task OpenWorksheetAsync(int sheetIdx, CancellationToken cancel); + private protected abstract bool OpenWorksheet(int sheetIdx); /// /// Gets the names of the worksheets in the workbook. diff --git a/source/Sylvan.Data.Excel/Sylvan.Data.Excel.csproj b/source/Sylvan.Data.Excel/Sylvan.Data.Excel.csproj index 0ec0757..1bfa70f 100644 --- a/source/Sylvan.Data.Excel/Sylvan.Data.Excel.csproj +++ b/source/Sylvan.Data.Excel/Sylvan.Data.Excel.csproj @@ -12,11 +12,11 @@ - $(DefineConstants);SPAN + $(DefineConstants);SPAN;ASYNC - $(DefineConstants);SPAN;DATE_ONLY + $(DefineConstants);SPAN;ASYNC;DATE_ONLY diff --git a/source/Sylvan.Data.Excel/Xls/XlsWorkbookReader+RecordReader.cs b/source/Sylvan.Data.Excel/Xls/XlsWorkbookReader+RecordReader.cs index 64b9dc3..0e04a8c 100644 --- a/source/Sylvan.Data.Excel/Xls/XlsWorkbookReader+RecordReader.cs +++ b/source/Sylvan.Data.Excel/Xls/XlsWorkbookReader+RecordReader.cs @@ -3,7 +3,6 @@ using System.IO; using System.Runtime.CompilerServices; using System.Text; -using System.Threading.Tasks; using static Sylvan.Data.Excel.Ole2Package; namespace Sylvan.Data.Excel.Xls; @@ -45,7 +44,7 @@ public RecordReader(Ole2Stream stream) this.strBuffer = Array.Empty(); } - async Task FillBufferAsync(int required) + bool FillBuffer(int required) { var len = bufferLen - recordOff; @@ -65,7 +64,7 @@ async Task FillBufferAsync(int required) while (c < required) { - var l = await stream.ReadAsync(buffer, len, BufferSize - len, default).ConfigureAwait(false); + var l = stream.Read(buffer, len, BufferSize - len); c += l; if (l == 0) { @@ -119,11 +118,11 @@ public int ReadInt32() return ReadByte() | ReadByte() << 8 | ReadByte() << 16 | ReadByte() << 24; } - public async Task ReadString16() + public string ReadString16() { if (bufferPos >= recordOff + recordLen) { - var next = await NextRecordAsync().ConfigureAwait(false); + var next = NextRecord(); if (!next || Type != RecordType.Continue) throw new InvalidDataException(); } @@ -144,7 +143,7 @@ public async Task ReadString16() if (asian) asianCount = ReadInt32(); - var str = await ReadStringBufferAsync(len, compressed).ConfigureAwait(false); + var str = ReadStringBuffer(len, compressed); var remain = richCount * 4 + asianCount; @@ -157,7 +156,7 @@ public async Task ReadString16() Assert(); if (remain > 0) { - var next = await NextRecordAsync().ConfigureAwait(false); + var next = NextRecord(); if (!next || Type != RecordType.Continue) throw new InvalidDataException(); } @@ -168,7 +167,7 @@ public async Task ReadString16() static readonly Encoding Encoding1252 = Encoding.GetEncoding(1252); - internal async Task ReadStringBufferAsync(int charCount, bool compressed) + internal string ReadStringBuffer(int charCount, bool compressed) { var strLen = charCount; // stores our position in the string we are assembling. @@ -216,7 +215,7 @@ internal async Task ReadStringBufferAsync(int charCount, bool compressed if (charCount > 0) { - var next = await NextRecordAsync().ConfigureAwait(false); + var next = NextRecord(); if (!next || Type != RecordType.Continue) throw new InvalidDataException(); @@ -232,7 +231,7 @@ internal async Task ReadStringBufferAsync(int charCount, bool compressed return new string(strBuffer, 0, strLen); } - public async Task ReadByteString(int lenSize) + public string ReadByteString(int lenSize) { int len; if (lenSize == 1) @@ -240,12 +239,12 @@ public async Task ReadByteString(int lenSize) else len = ReadInt16(); - await ReadStringBufferAsync(len, true).ConfigureAwait(false); + ReadStringBuffer(len, true); var str = new string(strBuffer, 0, len); return str; } - public async Task ReadString8() + public string ReadString8() { int len = ReadByte(); byte options = ReadByte(); @@ -262,7 +261,7 @@ public async Task ReadString8() if (asian) asianCount = ReadInt32(); - var str = await ReadStringBufferAsync(len, compressed).ConfigureAwait(false); + var str = ReadStringBuffer(len, compressed); for (int i = 0; i < richCount; i++) { @@ -277,21 +276,21 @@ public async Task ReadString8() return str; } - public async Task ReadStringAsync(int length, bool compressed) + public string ReadString(int length, bool compressed) { - var str = await ReadStringBufferAsync(length, compressed).ConfigureAwait(false); + var str = ReadStringBuffer(length, compressed); return str; } // reads the next BIFF record. Ensuring the entire // record bytes are in the working buffer. - public async Task NextRecordAsync() + public bool NextRecord() { bufferPos = recordOff + recordLen; if (bufferPos + 4 >= bufferLen) { - if (!await FillBufferAsync(4).ConfigureAwait(false)) + if (!FillBuffer(4)) { return false; } @@ -311,7 +310,7 @@ public async Task NextRecordAsync() var req = (recordOff + recordLen) - bufferLen; Debug.Assert(req >= 1); - if (!await FillBufferAsync(req).ConfigureAwait(false)) + if (!FillBuffer(req)) { return false; } diff --git a/source/Sylvan.Data.Excel/Xls/XlsWorkbookReader.cs b/source/Sylvan.Data.Excel/Xls/XlsWorkbookReader.cs index d4fe0b3..ebe3a22 100644 --- a/source/Sylvan.Data.Excel/Xls/XlsWorkbookReader.cs +++ b/source/Sylvan.Data.Excel/Xls/XlsWorkbookReader.cs @@ -1,8 +1,6 @@ using System; using System.Collections.Generic; using System.IO; -using System.Threading; -using System.Threading.Tasks; namespace Sylvan.Data.Excel.Xls; @@ -32,15 +30,7 @@ public XlsSheetInfo(string name, int offset, bool hidden) : base(name, hidden) int curFieldCount = 0; int pendingRow = -1; - internal static async Task CreateAsync(Stream iStream, ExcelDataReaderOptions options) - { - var reader = new XlsWorkbookReader(iStream, options); - await reader.ReadHeaderAsync().ConfigureAwait(false); - await reader.NextResultAsync().ConfigureAwait(false); - return reader; - } - - private XlsWorkbookReader(Stream stream, ExcelDataReaderOptions options) : base(stream, options) + internal XlsWorkbookReader(Stream stream, ExcelDataReaderOptions options) : base(stream, options) { var pkg = new Ole2Package(stream); var part = pkg.GetEntry("Workbook\0"); @@ -50,25 +40,26 @@ private XlsWorkbookReader(Stream stream, ExcelDataReaderOptions options) : base( this.reader = new RecordReader(ps); this.fieldInfos = new FieldInfo[16]; - + this.ReadHeader(); + this.NextResult(); } public override ExcelWorkbookType WorkbookType => ExcelWorkbookType.Excel; public override int RowNumber => rowNumber; - private protected override async Task OpenWorksheetAsync(int sheetIdx, CancellationToken cancel) + private protected override bool OpenWorksheet(int sheetIdx) { var info = (XlsSheetInfo)this.sheetInfos[sheetIdx]; this.rowNumber = 0; this.pendingRow = -1; reader.SetPosition(info.Offset); - await InitSheet(cancel).ConfigureAwait(false); + InitSheet(); this.sheetIdx = sheetIdx; return true; } - public override async Task NextResultAsync(CancellationToken cancel) + public override bool NextResult() { sheetIdx++; @@ -76,19 +67,14 @@ public override async Task NextResultAsync(CancellationToken cancel) { if (this.readHiddenSheets || this.sheetInfos[sheetIdx].Hidden == false) { - await OpenWorksheetAsync(sheetIdx, cancel).ConfigureAwait(false); + OpenWorksheet(sheetIdx); return true; } } return false; } - public override bool NextResult() - { - return NextResultAsync(default).GetAwaiter().GetResult(); - } - - public override async Task ReadAsync(CancellationToken cancel) + public override bool Read() { rowNumber++; if (this.rowIndex >= rowCount) @@ -105,12 +91,7 @@ public override async Task ReadAsync(CancellationToken cancel) } rowIndex++; - return await NextRow().ConfigureAwait(false); - } - - public override bool Read() - { - return ReadAsync(CancellationToken.None).GetAwaiter().GetResult(); + return NextRow(); } public override int MaxFieldCount => 256; @@ -127,9 +108,9 @@ BOFType ReadBOF() return (BOFType)type; } - async Task ReadHeaderAsync() + void ReadHeader() { - await reader.NextRecordAsync().ConfigureAwait(false); + reader.NextRecord(); if (reader.Type != RecordType.BOF) throw new InvalidDataException();//"Expected BOF record" @@ -140,16 +121,16 @@ async Task ReadHeaderAsync() var sheets = new List(); var xfs = new List(); - while (await reader.NextRecordAsync().ConfigureAwait(false)) + while (reader.NextRecord()) { var recordType = reader.Type; switch (recordType) { case RecordType.Sst: - await LoadSharedStringTable().ConfigureAwait(false); + LoadSharedStringTable(); break; case RecordType.Sheet: - sheets.Add(await LoadSheetRecord().ConfigureAwait(false)); + sheets.Add(LoadSheetRecord()); break; case RecordType.Style: ParseStyle(); @@ -158,7 +139,7 @@ async Task ReadHeaderAsync() xfs.Add(ParseXF()); break; case RecordType.Format: - await ParseFormat().ConfigureAwait(false); + ParseFormat(); break; case RecordType.YearEpoch: Parse1904(); @@ -175,12 +156,12 @@ async Task ReadHeaderAsync() this.xfMap = xfs.ToArray(); } - async Task InitSheet(CancellationToken cancel) + bool InitSheet() { rowIndex = -1; this.state = State.Initializing; - while (await reader.NextRecordAsync().ConfigureAwait(false)) + while (reader.NextRecord()) { if (reader.Type == RecordType.BOF) { @@ -203,7 +184,7 @@ async Task InitSheet(CancellationToken cancel) readBeginningOfSheet: while (true) { - await reader.NextRecordAsync().ConfigureAwait(false); + reader.NextRecord(); switch (reader.Type) { @@ -224,11 +205,11 @@ async Task InitSheet(CancellationToken cancel) } } done: - await ReadAsync(cancel).ConfigureAwait(false); + Read(); var result = LoadSchema(); if (!result) { - await ReadAsync(cancel).ConfigureAwait(false); + Read(); this.rowNumber = 1; } else @@ -250,17 +231,17 @@ int ParseXF() return ifmt; } - async Task ParseFormat() + void ParseFormat() { int ifmt = reader.ReadInt16(); string str; if (biffVersion == 0x0500) { - str = await reader.ReadByteString(1).ConfigureAwait(false); + str = reader.ReadByteString(1); } else { - str = await reader.ReadString16().ConfigureAwait(false); + str = reader.ReadString16(); } if (formats.ContainsKey(ifmt)) @@ -304,12 +285,12 @@ void ParseMulRK() } } - async Task ParseLabel() + void ParseLabel() { int rowIdx = reader.ReadUInt16(); int colIdx = reader.ReadUInt16(); int xfIdx = reader.ReadUInt16(); - string str = await reader.ReadByteString(2).ConfigureAwait(false); + string str = reader.ReadByteString(2); SetRowData(colIdx, new FieldInfo(str)); } @@ -350,7 +331,7 @@ void ParseNumber() SetRowData(colIdx, new FieldInfo(d, xfIdx)); } - async Task ParseFormula() + void ParseFormula() { var rowIdx = reader.ReadUInt16(); var colIdx = reader.ReadUInt16(); @@ -373,11 +354,11 @@ async Task ParseFormula() switch (rtype) { case 0: // string - var next = await reader.NextRecordAsync().ConfigureAwait(false); + var next = reader.NextRecord(); if (!next || reader.Type != RecordType.String) throw new InvalidDataException(); int len = reader.ReadUInt16(); byte kind = reader.ReadByte(); - var str = await reader.ReadStringAsync(len, kind == 0).ConfigureAwait(false); + var str = reader.ReadString(len, kind == 0); SetRowData(colIdx, new FieldInfo(str)); break; case 1: // boolean @@ -411,7 +392,7 @@ void SetRowData(int colIdx, FieldInfo cd) } - async Task NextRow() + bool NextRow() { // clear out any fields from previous row Array.Clear(this.fieldInfos, 0, this.fieldInfos.Length); @@ -420,7 +401,7 @@ async Task NextRow() { if (pendingRow == -1) { - if (!await reader.NextRecordAsync().ConfigureAwait(false)) + if (!reader.NextRecord()) { return false; } @@ -480,7 +461,7 @@ async Task NextRow() ParseLabelSST(); break; case RecordType.Label: - await ParseLabel().ConfigureAwait(false); + ParseLabel(); break; case RecordType.RK: ParseRK(); @@ -492,7 +473,7 @@ async Task NextRow() ParseNumber(); break; case RecordType.Formula: - await ParseFormula().ConfigureAwait(false); + ParseFormula(); break; case RecordType.Blank: case RecordType.BoolErr: @@ -538,7 +519,7 @@ int ParseDimension() return rowEnd; } - async Task LoadSheetRecord() + XlsSheetInfo LoadSheetRecord() { int offset = reader.ReadInt32(); byte visibility = reader.ReadByte(); @@ -546,13 +527,13 @@ async Task LoadSheetRecord() string name = biffVersion == 0x0500 - ? await reader.ReadByteString(1).ConfigureAwait(false) - : await reader.ReadString8().ConfigureAwait(false); + ? reader.ReadByteString(1) + : reader.ReadString8(); return new XlsSheetInfo(name, offset, visibility != 0); } - async Task LoadSharedStringTable() + void LoadSharedStringTable() { int totalString = reader.ReadInt32(); int uniqueString = reader.ReadInt32(); @@ -561,7 +542,7 @@ async Task LoadSharedStringTable() for (int i = 0; i < uniqueString; i++) { - var s = await reader.ReadString16().ConfigureAwait(false); + var s = reader.ReadString16(); strings[i] = s; } diff --git a/source/Sylvan.Data.Excel/Xlsb/XlsbWorkbookReader.cs b/source/Sylvan.Data.Excel/Xlsb/XlsbWorkbookReader.cs index 5217379..52c5f37 100644 --- a/source/Sylvan.Data.Excel/Xlsb/XlsbWorkbookReader.cs +++ b/source/Sylvan.Data.Excel/Xlsb/XlsbWorkbookReader.cs @@ -5,8 +5,6 @@ using System.IO; using System.IO.Compression; using System.Text; -using System.Threading; -using System.Threading.Tasks; namespace Sylvan.Data.Excel.Xlsb; @@ -115,7 +113,7 @@ private protected override ref readonly FieldInfo GetFieldValue(int ordinal) return ref values[ordinal]; } - private protected override Task OpenWorksheetAsync(int sheetIdx, CancellationToken cancel) + private protected override bool OpenWorksheet(int sheetIdx) { var sheetName = sheetInfos[sheetIdx].Part; // the relationship is recorded as an absolute path @@ -137,7 +135,7 @@ private protected override Task OpenWorksheetAsync(int sheetIdx, Cancellat this.rowFieldCount = 0; this.curFieldCount = -1; this.sheetIdx = sheetIdx; - return Task.FromResult(InitializeSheet()); + return InitializeSheet(); } public override bool NextResult() @@ -153,7 +151,7 @@ public override bool NextResult() if (sheetIdx >= this.sheetInfos.Length) return false; - return OpenWorksheetAsync(sheetIdx, default).GetAwaiter().GetResult(); + return OpenWorksheet(sheetIdx); } bool InitializeSheet() diff --git a/source/Sylvan.Data.Excel/Xlsx/XlsxDataWriter.cs b/source/Sylvan.Data.Excel/Xlsx/XlsxDataWriter.cs index c33e008..dd3b1d5 100644 --- a/source/Sylvan.Data.Excel/Xlsx/XlsxDataWriter.cs +++ b/source/Sylvan.Data.Excel/Xlsx/XlsxDataWriter.cs @@ -181,6 +181,7 @@ async Task WriteInternal(DbDataReader data, string? worksheetName, xw.Write(""); row++; } + bool complete = true; while (true) { diff --git a/source/Sylvan.Data.Excel/Xlsx/XlsxWorkbookReader.cs b/source/Sylvan.Data.Excel/Xlsx/XlsxWorkbookReader.cs index 1900a4b..c6557da 100644 --- a/source/Sylvan.Data.Excel/Xlsx/XlsxWorkbookReader.cs +++ b/source/Sylvan.Data.Excel/Xlsx/XlsxWorkbookReader.cs @@ -7,8 +7,6 @@ using System.Linq; using System.Text; using System.Xml; -using System.Threading.Tasks; -using System.Threading; using Sylvan.Data.Excel.Xlsx; #if !SPAN @@ -49,6 +47,7 @@ public XlsxWorkbookReader(Stream iStream, ExcelDataReaderOptions opts) : base(iS this.sheetStream = Stream.Null; package = new ZipArchive(iStream, ZipArchiveMode.Read, true); + var workbookPartName = OpenPackaging.GetWorkbookPart(package) ?? DefaultWorkbookPartName; @@ -177,7 +176,7 @@ private protected override ref readonly FieldInfo GetFieldValue(int ordinal) return ref base.GetFieldValue(ordinal); } - private protected override Task OpenWorksheetAsync(int sheetIdx, CancellationToken cancel) + private protected override bool OpenWorksheet(int sheetIdx) { var sheetName = sheetInfos[sheetIdx].Part; // the relationship is recorded as an absolute path @@ -185,7 +184,7 @@ private protected override Task OpenWorksheetAsync(int sheetIdx, Cancellat sheetName = sheetName.TrimStart('/'); var sheetPart = package.GetEntry(sheetName); if (sheetPart == null) - return Task.FromResult(false); + return false; this.sheetStream = sheetPart.Open(); @@ -237,7 +236,7 @@ private protected override Task OpenWorksheetAsync(int sheetIdx, Cancellat this.hasRows = InitializeSheet(); this.sheetIdx = sheetIdx; - return Task.FromResult(true); + return true; } public override bool NextResult() @@ -255,7 +254,7 @@ public override bool NextResult() return false; } - return OpenWorksheetAsync(sheetIdx, default).GetAwaiter().GetResult(); + return OpenWorksheet(sheetIdx); } bool InitializeSheet() From 3776a140a724727f88940c5a785f444b65cfd006 Mon Sep 17 00:00:00 2001 From: MarkPflug Date: Fri, 8 Sep 2023 11:33:54 -0700 Subject: [PATCH 2/2] Add ExcelDataWriter.CreateAsync --- docs/Async.md | 9 +- docs/ReleaseNotes.md | 3 +- .../ExcelDataWriterTests.cs | 132 +++++++++++++++++- .../Sylvan.Data.Excel.Tests.csproj | 2 +- source/Sylvan.Data.Excel.Tests/TestStream.cs | 1 + source/Sylvan.Data.Excel/ExcelDataWriter.cs | 132 +++++++++++++++--- .../ExcelDataWriterOptions.cs | 6 + .../Sylvan.Data.Excel/Xlsb/XlsbDataWriter.cs | 10 ++ .../Sylvan.Data.Excel/Xlsx/XlsxDataWriter.cs | 9 ++ 9 files changed, 280 insertions(+), 24 deletions(-) diff --git a/docs/Async.md b/docs/Async.md index 54cc1ec..709c5ae 100644 --- a/docs/Async.md +++ b/docs/Async.md @@ -2,8 +2,10 @@ ExcelDataReader and ExcelDataWriter both support "async" operations. In order to ensure proper async behavior, you must use the `CreateAsync` -method to create instances. In this mode, the entire file will be -asynchronously buffered into memory. +method to create instances. In this mode, the entire file must be +buffered in memory, and all IO will be handled asynchronously. + +The CreateAsync methods are only supported on .NET Core versions. Reading: ``` @@ -19,6 +21,7 @@ while(await edr.ReadAsync()) Writing: ``` +// must use async disposal await using var edw = ExcelDataWriter.CreateAsync("jumbo.xlsx"); edw.WriteAsync(myDataReader, "MyData"); @@ -26,4 +29,4 @@ edw.WriteAsync(myOtherDataReader, "MoreData"); // when the ExcelDataWriter is asynchronously disposed // the buffered file is asynchronously written to the output. -``` \ No newline at end of file +``` diff --git a/docs/ReleaseNotes.md b/docs/ReleaseNotes.md index 8daafe8..e9093b6 100644 --- a/docs/ReleaseNotes.md +++ b/docs/ReleaseNotes.md @@ -2,7 +2,8 @@ _0.4.16_ - Adds ExcelFileType class that exposes constants about supported Excel formats: extensions and content types. -- Add `Obsolete` to `TryOpenWorksheetAsync`, use `TryOpenWorksheet` instead. Information about future Async strategy will be forthcoming. +- Add `Obsolete` to `TryOpenWorksheetAsync`, use `TryOpenWorksheet` instead. +- Add `CreateAsync` to `ExcelDataReader` and `ExcelDataWriter` to enable async operation (not supported on .NET Framework versions). _0.4.15_ - Fix a bug that prevented .xlsx reader from working on .NET Framework versions. diff --git a/source/Sylvan.Data.Excel.Tests/ExcelDataWriterTests.cs b/source/Sylvan.Data.Excel.Tests/ExcelDataWriterTests.cs index ba6447e..0c1b765 100644 --- a/source/Sylvan.Data.Excel.Tests/ExcelDataWriterTests.cs +++ b/source/Sylvan.Data.Excel.Tests/ExcelDataWriterTests.cs @@ -1,4 +1,5 @@ using Sylvan.Data.Csv; +using Sylvan.Testing; using System; using System.Collections; using System.Data.Common; @@ -7,6 +8,7 @@ using System.IO.Compression; using System.Linq; using System.Runtime.CompilerServices; +using System.Threading.Tasks; using Xunit; namespace Sylvan.Data.Excel; @@ -15,7 +17,7 @@ public class XlsxDataWriterTests : ExcelDataWriterTests { const string FileFormat = "{0}.xlsx"; - public override ExcelWorkbookType WorkbookType => ExcelWorkbookType.Excel; + public override ExcelWorkbookType WorkbookType => ExcelWorkbookType.ExcelXml; protected override string GetFile(string name) { @@ -43,6 +45,19 @@ public abstract class ExcelDataWriterTests public abstract ExcelWorkbookType WorkbookType { get; } public object Enumable { get; private set; } + void Validate(string file) + { + // simple validation that we can read back what we wrote. + using var edr = ExcelDataReader.Create(file); + while (edr.Read()) + { + for (int i = 0; i < edr.RowFieldCount; i++) + { + var value = edr.GetValue(i); + } + } + } + static void Unpack(string file, [CallerMemberName] string folder = null) { // useful for debugging. @@ -89,6 +104,7 @@ public void Simple() w.Write(reader); } //Open(f); + Validate(f); } [Fact] @@ -117,6 +133,7 @@ public void Decimal() w.Write(reader); } //Open(f); + Validate(f); } [Fact] @@ -142,6 +159,7 @@ public void Ints() { w.Write(reader); } + Validate(f); //Open(f); } @@ -170,6 +188,7 @@ public void CommonTypes() { w.Write(reader); } + Validate(f); //Open(f); } @@ -215,6 +234,7 @@ public void MultiSheet() reader = data.AsDataReader(); w.Write(reader); } + Validate(f); } [Fact] @@ -239,6 +259,7 @@ public void BigString() { w.Write(reader); } + Validate(f); //Open(f); } @@ -262,6 +283,7 @@ public void NullCharString() { w.Write(reader); } + Validate(f); //Open(f); } @@ -284,6 +306,7 @@ public void Boolean() { w.Write(reader); } + Validate(f); //Open(f); } @@ -301,6 +324,7 @@ public void JaggedData() { edw.Write(dr); } + Validate(f); //Open(f); } @@ -317,6 +341,7 @@ public void WhiteSpace() edw.Write(csv); } + Validate(f); using var edr = ExcelDataReader.Create(f); Assert.Equal(" a ", edr.GetName(0)); Assert.Equal(" b", edr.GetName(1)); @@ -350,6 +375,7 @@ public void Binary() { edw.Write(dr); } + Validate(f); //Open(f); } @@ -363,6 +389,7 @@ public void CharArray() edw.Write(dr); } + Validate(f); // read back the created file and assert everything is as we expected using (var edr = ExcelDataReader.Create(f)) { @@ -399,6 +426,7 @@ public void Char() { edw.Write(dr); } + Validate(f); //Open(f); } @@ -423,6 +451,7 @@ public void Byte() { edw.Write(dr); } + Validate(f); //Open(f); } @@ -455,6 +484,7 @@ public void GuidData() { edw.Write(dr); } + Validate(f); //Open(f); } @@ -480,10 +510,104 @@ public void TimeSpanData() { edw.Write(dr); } + Validate(f); //Open(f); } -#if NET6_0_OR_GREATER +#if ASYNC + [Fact] + public async Task Async() { + var f = GetFile(); + + var r = new Random(); + var data = + Enumerable.Range(1, 4096) + .Select( + i => new + { + Id = i, //int32 + Name = "Name" + i, //string + ValueInt = r.Next(), // another, bigger int + ValueDouble = Math.PI * i, // double + Decimal = 1.25m * i, + Date = DateTime.Today.AddHours(i), + } + ).AsDataReader(); + + await using (var edw = await ExcelDataWriter.CreateAsync(f)) + { + await edw.WriteAsync(data); + } + Validate(f); + //Open(f); + } + + [Fact] + public async Task StreamAsync() + { + var f = GetFile(); + + var r = new Random(); + var data = + Enumerable.Range(1, 4096) + .Select( + i => new + { + Id = i, //int32 + Name = "Name" + i, //string + ValueInt = r.Next(), // another, bigger int + ValueDouble = Math.PI * i, // double + Decimal = 1.25m * i, + Date = DateTime.Today.AddHours(i), + } + ).AsDataReader(); + var s = File.Create(f); + var testStream = new TestStream(s); + await using (var edw = await ExcelDataWriter.CreateAsync(testStream, WorkbookType)) + { + await edw.WriteAsync(data); + } + Assert.False(testStream.IsClosed); + + testStream.Close(); + Validate(f); + //Open(f); + } + + [Fact] + public async Task OwnedStreamAsync() + { + var f = GetFile(); + + var r = new Random(); + var data = + Enumerable.Range(1, 4096) + .Select( + i => new + { + Id = i, //int32 + Name = "Name" + i, //string + ValueInt = r.Next(), // another, bigger int + ValueDouble = Math.PI * i, // double + Decimal = 1.25m * i, + Date = DateTime.Today.AddHours(i), + } + ).AsDataReader(); + var s = File.Create(f); + var testStream = new TestStream(s); + + var opts = new ExcelDataWriterOptions { OwnsStream = true }; + await using (var edw = await ExcelDataWriter.CreateAsync(testStream, WorkbookType, opts)) + { + await edw.WriteAsync(data); + } + Assert.True(testStream.IsClosed); + Validate(f); + //Open(f); + } +#endif + +#if DATEONLY [Fact] public void DateOnly() @@ -504,6 +628,8 @@ public void DateOnly() { w.Write(reader); } + + Validate(f); //Open(f); } @@ -526,6 +652,8 @@ public void TimeOnly() { w.Write(reader); } + + Validate(f); //Open(f); } diff --git a/source/Sylvan.Data.Excel.Tests/Sylvan.Data.Excel.Tests.csproj b/source/Sylvan.Data.Excel.Tests/Sylvan.Data.Excel.Tests.csproj index 4f3a65b..254f722 100644 --- a/source/Sylvan.Data.Excel.Tests/Sylvan.Data.Excel.Tests.csproj +++ b/source/Sylvan.Data.Excel.Tests/Sylvan.Data.Excel.Tests.csproj @@ -8,7 +8,7 @@ - $(DefineConstants);ASYNC + $(DefineConstants);ASYNC;DATEONLY diff --git a/source/Sylvan.Data.Excel.Tests/TestStream.cs b/source/Sylvan.Data.Excel.Tests/TestStream.cs index 9a817d9..4d2eef3 100644 --- a/source/Sylvan.Data.Excel.Tests/TestStream.cs +++ b/source/Sylvan.Data.Excel.Tests/TestStream.cs @@ -28,6 +28,7 @@ public TestStream(Stream stream) public override void Close() { this.IsClosed = true; + stream.Close(); base.Close(); } diff --git a/source/Sylvan.Data.Excel/ExcelDataWriter.cs b/source/Sylvan.Data.Excel/ExcelDataWriter.cs index 91e7279..49066d9 100644 --- a/source/Sylvan.Data.Excel/ExcelDataWriter.cs +++ b/source/Sylvan.Data.Excel/ExcelDataWriter.cs @@ -1,6 +1,5 @@ using Sylvan.Data.Excel.Xlsx; using System; -using System.Collections.Generic; using System.Data.Common; using System.IO; using System.Threading; @@ -11,22 +10,34 @@ namespace Sylvan.Data.Excel; /// /// Writes data to excel files. /// -public abstract class ExcelDataWriter : IDisposable +public abstract class ExcelDataWriter : + IDisposable +#if ASYNC + , IAsyncDisposable +#endif { + + bool isAsync; +#if ASYNC + Stream? outputStream; +#endif + bool ownsStream; - readonly Stream stream; + readonly Stream stream; private protected readonly bool truncateStrings; +#if ASYNC + /// /// Creates a new ExcelDataWriter. /// - public static ExcelDataWriter Create(string file, ExcelDataWriterOptions? options = null) + public static async Task CreateAsync(string file, ExcelDataWriterOptions? options = null, CancellationToken cancel = default) { options = options ?? ExcelDataWriterOptions.Default; var type = ExcelDataReader.GetWorkbookType(file); var stream = File.Create(file); - var w = Create(stream, type, options); + var w = await CreateAsync(stream, type, options, cancel).ConfigureAwait(false); w.ownsStream = true; return w; } @@ -34,36 +45,123 @@ public static ExcelDataWriter Create(string file, ExcelDataWriterOptions? option /// /// Creates a new ExcelDataWriter. /// - public static ExcelDataWriter Create(Stream stream, ExcelWorkbookType type, ExcelDataWriterOptions? options = null) + public static async Task CreateAsync(Stream stream, ExcelWorkbookType type, ExcelDataWriterOptions? options = null, CancellationToken cancel = default) { options = options ?? ExcelDataWriterOptions.Default; + ExcelDataWriter writer; + var ms = new Sylvan.IO.PooledMemoryStream(); switch (type) { case ExcelWorkbookType.ExcelXml: - { - var w = new XlsxDataWriter(stream, options); - return w; - } + writer = new XlsxDataWriter(ms, options); + break; #if NET6_0_OR_GREATER case ExcelWorkbookType.ExcelBinary: - { - var w = new Xlsb.XlsbDataWriter(stream, options); - return w; - } + writer = new Xlsb.XlsbDataWriter(ms, options); + break; #endif + default: + throw new NotSupportedException(); + } + writer.isAsync = true; + writer.outputStream = stream; + // HACK: I want this method to be async to have symmetry with the `string filename` overload. + await Task.CompletedTask.ConfigureAwait(false); + return writer; + } + +#endif + + /// + /// Creates a new ExcelDataWriter. + /// + public static ExcelDataWriter Create(string file, ExcelDataWriterOptions? options = null) + { + options = options ?? ExcelDataWriterOptions.Default; + var type = ExcelDataReader.GetWorkbookType(file); + var stream = File.Create(file); + var w = Create(stream, type, options); + w.ownsStream = true; + return w; + } + + /// + /// Creates a new ExcelDataWriter. + /// + public static ExcelDataWriter Create(Stream stream, ExcelWorkbookType type, ExcelDataWriterOptions? options = null) + { + options = options ?? ExcelDataWriterOptions.Default; + try + { + switch (type) + { + case ExcelWorkbookType.ExcelXml: + return new XlsxDataWriter(stream, options); +#if NET6_0_OR_GREATER + case ExcelWorkbookType.ExcelBinary: + return new Xlsb.XlsbDataWriter(stream, options); +#endif + } + throw new NotSupportedException(); + } + catch + { + if (options?.OwnsStream == true) + { + stream.Dispose(); + } + throw; } - throw new NotSupportedException(); } /// public virtual void Dispose() { - if (ownsStream) - this.stream.Dispose(); + if (isAsync) + { + throw new InvalidOperationException(); + } + else + { + if (ownsStream) + { + this.stream.Dispose(); + } + } } +#if ASYNC + + /// + public virtual async ValueTask DisposeAsync() + { + if (isAsync) + { + // outputStream should never be null here + if (outputStream != null) + { + this.stream.Seek(0, SeekOrigin.Begin); + await this.stream.CopyToAsync(this.outputStream!, CancellationToken.None).ConfigureAwait(true); + if (ownsStream) + { + await this.outputStream!.DisposeAsync().ConfigureAwait(true); + } + } + } + else + { + if (ownsStream) + { + await this.stream.DisposeAsync().ConfigureAwait(true); + } + } + } +#endif + private protected ExcelDataWriter(Stream stream, ExcelDataWriterOptions options) { + this.isAsync = false; + this.ownsStream = options.OwnsStream; this.stream = stream; this.truncateStrings = options.TruncateStrings; } diff --git a/source/Sylvan.Data.Excel/ExcelDataWriterOptions.cs b/source/Sylvan.Data.Excel/ExcelDataWriterOptions.cs index 8af211c..ca938cd 100644 --- a/source/Sylvan.Data.Excel/ExcelDataWriterOptions.cs +++ b/source/Sylvan.Data.Excel/ExcelDataWriterOptions.cs @@ -21,8 +21,14 @@ public ExcelDataWriterOptions() { this.TruncateStrings = false; this.CompressionLevel = CompressionLevel.Fastest; + this.OwnsStream = false; } + /// + /// Indicates if the ExcelDataWriter owns the output stream and handle disposal. + /// + public bool OwnsStream { get; set; } + /// /// Indicates if string values should be truncated to the limit of Excel, which allows a maximum of 32k characters. /// diff --git a/source/Sylvan.Data.Excel/Xlsb/XlsbDataWriter.cs b/source/Sylvan.Data.Excel/Xlsb/XlsbDataWriter.cs index de773c2..218f2f0 100644 --- a/source/Sylvan.Data.Excel/Xlsb/XlsbDataWriter.cs +++ b/source/Sylvan.Data.Excel/Xlsb/XlsbDataWriter.cs @@ -826,6 +826,16 @@ public override void Dispose() this.zipArchive.Dispose(); base.Dispose(); } + +#if ASYNC + public override ValueTask DisposeAsync() + { + this.Close(); + this.zipArchive.Dispose(); + return base.DisposeAsync(); + } +#endif + } #endif \ No newline at end of file diff --git a/source/Sylvan.Data.Excel/Xlsx/XlsxDataWriter.cs b/source/Sylvan.Data.Excel/Xlsx/XlsxDataWriter.cs index dd3b1d5..adef2d8 100644 --- a/source/Sylvan.Data.Excel/Xlsx/XlsxDataWriter.cs +++ b/source/Sylvan.Data.Excel/Xlsx/XlsxDataWriter.cs @@ -539,4 +539,13 @@ public override void Dispose() this.zipArchive.Dispose(); base.Dispose(); } + +#if ASYNC + public override ValueTask DisposeAsync() + { + this.Close(); + this.zipArchive.Dispose(); + return base.DisposeAsync(); + } +#endif }