Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Xls label fix #183

Merged
merged 7 commits into from
Aug 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions docs/ReleaseNotes.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
# Sylvan.Data.Excel Release Notes

_0.4.25_
- Fix some issues with reading Excel 95 .xls files.

_0.4.24_
- Fix for reading certain .xls files.
- Handle writing NaN and infinity values.
Expand Down
28 changes: 27 additions & 1 deletion source/Sylvan.Data.Excel.Tests/ExternalDataTests.cs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#if NETCOREAPP3_0_OR_GREATER

using Sylvan.Data.Csv;
using System;
using System.Collections.Generic;
using System.Diagnostics;
Expand Down Expand Up @@ -231,7 +232,6 @@ public void GetValue(string path)
GetErrorAsNull = true
};
var edr = ExcelDataReader.Create(path, opts);

do
{
while (edr.Read())
Expand All @@ -243,6 +243,32 @@ public void GetValue(string path)
}
} while (edr.NextResult());
}

[Theory]
[MemberData(nameof(GetExcelFiles))]
public void ToCsv(string filename)
{
if (filename == null) return;

var root = GetRootPath();
var path = Path.Combine(root, filename);

var opts = new ExcelDataReaderOptions
{
Schema = ExcelSchema.NoHeaders,
GetErrorAsNull = true
};
var edr = ExcelDataReader.Create(path, opts);

do
{
var outPath = $"{filename}-{edr.WorksheetName}.csv";
var dir = Path.GetDirectoryName(outPath);
Directory.CreateDirectory(dir);
using var w = CsvDataWriter.Create($"{filename}-{edr.WorksheetName}.csv");
w.Write(edr.AsVariableField(e => e.RowFieldCount));
} while (edr.NextResult());
}
}

#endif
1 change: 1 addition & 0 deletions source/Sylvan.Data.Excel/ExcelDataReader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -633,6 +633,7 @@ public sealed override object GetValue(int ordinal)
var kind = fmt?.Kind ?? FormatKind.Number;
switch (kind)
{
case FormatKind.String:
case FormatKind.Number:
var doubleValue = GetDouble(ordinal);
unchecked
Expand Down
2 changes: 1 addition & 1 deletion source/Sylvan.Data.Excel/Sylvan.Data.Excel.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
<PropertyGroup>
<TargetFrameworks>net6.0;netstandard2.1;netstandard2.0</TargetFrameworks>
<LangVersion>latest</LangVersion>
<VersionPrefix>0.4.24</VersionPrefix>
<VersionPrefix>0.4.25</VersionPrefix>
<Description>A cross-platform .NET library for reading Excel data files.</Description>
<PackageTags>excel;xls;xlsx;xlsb;datareader</PackageTags>
<Nullable>enable</Nullable>
Expand Down
115 changes: 48 additions & 67 deletions source/Sylvan.Data.Excel/Xls/XlsWorkbookReader+RecordReader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -115,57 +115,7 @@ public int ReadInt32()
{
return ReadByte() | ReadByte() << 8 | ReadByte() << 16 | ReadByte() << 24;
}

public string ReadString16()
{
if (bufferPos >= recordOff + recordLen)
{
var next = NextRecord();
if (!next || Type != RecordType.Continue)
throw new InvalidDataException();
}

// the length of the string in *characters*
int len = ReadInt16();
if (len < 0)
{
throw new InvalidDataException();
}
byte options = ReadByte();

bool compressed = (options & 0x01) == 0;
bool asian = (options & 0x04) != 0;
bool rich = (options & 0x08) != 0;

int richCount = 0;
if (rich)
richCount = ReadInt16();

int asianCount = 0;
if (asian)
asianCount = ReadInt32();

var str = ReadStringBuffer(len, compressed);

var remain = richCount * 4 + asianCount;

while (remain > 0)
{
var avail = recordOff + recordLen - bufferPos;
var c = Math.Min(remain, avail);
remain -= c;
bufferPos += c;
Assert();
if (remain > 0)
{
var next = NextRecord();
if (!next || Type != RecordType.Continue)
throw new InvalidDataException();
}
}

return str;
}


static readonly Encoding Encoding1252 = Encoding.GetEncoding(1252);

Expand Down Expand Up @@ -238,20 +188,44 @@ internal string ReadStringBuffer(int charCount, bool compressed)

public string ReadByteString(int lenSize)
{
int len;
if (lenSize == 1)
len = ReadByte();
else
len = ReadInt16();

ReadStringBuffer(len, true);
var str = new string(strBuffer, 0, len);
return str;
int len =
lenSize == 1
? ReadByte()
: ReadInt16();

return ReadStringBuffer(len, true);
}

public string ReadString8()
{
int len = ReadByte();
MaybeContinueString();
var len = ReadByte();
return ReadString(len);
}

public string ReadString16()
{
MaybeContinueString();
var len = ReadInt16();
return ReadString(len);
}

void MaybeContinueString()
{
if (bufferPos >= recordOff + recordLen)
{
var next = NextRecord();
if (!next || Type != RecordType.Continue)
throw new InvalidDataException();
}
}

public string ReadString(int len)
{
if (len < 0)
{
throw new InvalidDataException();
}
byte options = ReadByte();

bool compressed = (options & 0x01) == 0;
Expand All @@ -268,14 +242,21 @@ public string ReadString8()

var str = ReadStringBuffer(len, compressed);

for (int i = 0; i < richCount; i++)
{
ReadInt32();
}
var remain = richCount * 4 + asianCount;

for (int i = 0; i < asianCount; i++)
while (remain > 0)
{
ReadByte();
var avail = recordOff + recordLen - bufferPos;
var c = Math.Min(remain, avail);
remain -= c;
bufferPos += c;
Assert();
if (remain > 0)
{
var next = NextRecord();
if (!next || Type != RecordType.Continue)
throw new InvalidDataException();
}
}

return str;
Expand Down
60 changes: 47 additions & 13 deletions source/Sylvan.Data.Excel/Xls/XlsWorkbookReader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,10 @@ public XlsSheetInfo(string name, int offset, bool hidden) : base(name, hidden)
internal XlsWorkbookReader(Stream stream, ExcelDataReaderOptions options) : base(stream, options)
{
var pkg = new Ole2Package(stream);
var part = pkg.GetEntry("Workbook\0");
var part =
pkg.GetEntry("Workbook\0") ??
pkg.GetEntry("Book\0");

if (part == null)
throw new InvalidDataException();
var ps = part.Open();
Expand Down Expand Up @@ -254,15 +257,10 @@ int ParseXF()
void ParseFormat()
{
int ifmt = reader.ReadInt16();
string str;
if (biffVersion == 0x0500)
{
str = reader.ReadByteString(1);
}
else
{
str = reader.ReadString16();
}
string str =
biffVersion == 0x0500
? reader.ReadByteString(1)
: reader.ReadString16();

if (formats.ContainsKey(ifmt))
{
Expand Down Expand Up @@ -310,7 +308,38 @@ void ParseLabel()
int rowIdx = reader.ReadUInt16();
int colIdx = reader.ReadUInt16();
int xfIdx = reader.ReadUInt16();
string str = reader.ReadByteString(2);
int len = reader.ReadInt16();
if (len > 255) throw new InvalidDataException();
bool compressed = true;
if (biffVersion == 0x0500)
{
// apparently there are no flags in this version
}
else
{
byte flags = reader.ReadByte();
compressed = (flags & 1) == 0;
}

var str = reader.ReadStringBuffer(len, compressed);
SetRowData(colIdx, new FieldInfo(str));
}

void ParseRString()
{
int rowIdx = reader.ReadUInt16();
int colIdx = reader.ReadUInt16();
int xfIdx = reader.ReadUInt16();
var len = reader.ReadInt16();
var str = reader.ReadStringBuffer(len, true);

// consume the formatting info
var x = reader.ReadByte();
for (int i = 0; i < x; i++)
{
reader.ReadUInt16();
}

SetRowData(colIdx, new FieldInfo(str));
}

Expand Down Expand Up @@ -470,7 +499,10 @@ int NextRow()
}
else
{
throw new InvalidDataException();
peekRow = (ushort)(rowIndex + 1);
pendingRow = peekRow;
return 0;
//throw new InvalidDataException();
}
}
break;
Expand Down Expand Up @@ -516,10 +548,12 @@ int NextRow()
case RecordType.Formula:
ParseFormula();
break;
case RecordType.RString:
ParseRString();
break;
case RecordType.Blank:
case RecordType.BoolErr:
case RecordType.MulBlank:
case RecordType.RString:
break;
case RecordType.Array:
case RecordType.SharedFmla:
Expand Down