Skip to content

Commit

Permalink
Universe data frames improvements (#8433)
Browse files Browse the repository at this point in the history
* Default Data to null for ETFConstituentUniverses.

The data collection will be assigned only if needed. This allows data column to be filtered from dataframes since it will always be null for all constituents.

* Make base data collection aggregator reader fall back to BaseDataCollection

After instatiating the collection type, fall back to the base BaseDataCollection to aggregate data if the type is not a base data collection.

* Minor change

* Minor change

* Update pythonnet to 2.0.41

* Ignore data column for every flattened universe dataframe

* Filter empty collections columns in data frames

* Allow snake case named attributes in PythonSlice

* Remove PythonSlice Data Python class

Pythonnet handles dynamic objects behavior
  • Loading branch information
jhonabreul authored Dec 3, 2024
1 parent f431454 commit a28d1f2
Show file tree
Hide file tree
Showing 16 changed files with 200 additions and 54 deletions.
2 changes: 1 addition & 1 deletion Algorithm.CSharp/QuantConnect.Algorithm.CSharp.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
<DebugType>portable</DebugType>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="QuantConnect.pythonnet" Version="2.0.40" />
<PackageReference Include="QuantConnect.pythonnet" Version="2.0.41" />
<PackageReference Include="Accord" Version="3.6.0" />
<PackageReference Include="Accord.Fuzzy" Version="3.6.0" />
<PackageReference Include="Accord.MachineLearning" Version="3.6.0" />
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
<PackageLicenseFile>LICENSE</PackageLicenseFile>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="QuantConnect.pythonnet" Version="2.0.40" />
<PackageReference Include="QuantConnect.pythonnet" Version="2.0.41" />
<PackageReference Include="Accord" Version="3.6.0" />
<PackageReference Include="Accord.Math" Version="3.6.0" />
<PackageReference Include="Accord.Statistics" Version="3.6.0" />
Expand Down
2 changes: 1 addition & 1 deletion Algorithm.Python/QuantConnect.Algorithm.Python.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
<Compile Include="..\Common\Properties\SharedAssemblyInfo.cs" Link="Properties\SharedAssemblyInfo.cs" />
</ItemGroup>
<ItemGroup>
<PackageReference Include="QuantConnect.pythonnet" Version="2.0.40" />
<PackageReference Include="QuantConnect.pythonnet" Version="2.0.41" />
</ItemGroup>
<ItemGroup>
<Content Include="OptionUniverseFilterGreeksShortcutsRegressionAlgorithm.py" />
Expand Down
2 changes: 1 addition & 1 deletion Algorithm/QuantConnect.Algorithm.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
<PackageLicenseFile>LICENSE</PackageLicenseFile>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="QuantConnect.pythonnet" Version="2.0.40" />
<PackageReference Include="QuantConnect.pythonnet" Version="2.0.41" />
<PackageReference Include="MathNet.Numerics" Version="5.0.0" />
<PackageReference Include="Newtonsoft.Json" Version="13.0.2" />
<PackageReference Include="NodaTime" Version="3.0.5" />
Expand Down
2 changes: 1 addition & 1 deletion AlgorithmFactory/QuantConnect.AlgorithmFactory.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
<PackageLicenseFile>LICENSE</PackageLicenseFile>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="QuantConnect.pythonnet" Version="2.0.40" />
<PackageReference Include="QuantConnect.pythonnet" Version="2.0.41" />
<PackageReference Include="NodaTime" Version="3.0.5" />
</ItemGroup>
<ItemGroup>
Expand Down
12 changes: 11 additions & 1 deletion Common/Python/PandasData.cs
Original file line number Diff line number Diff line change
Expand Up @@ -710,7 +710,17 @@ public void Add(DateTime time, object input, bool overrideValues)
}
else if (value != null)
{
ShouldFilter = false;
if (value is ICollection enumerable)
{
if (enumerable.Count != 0)
{
ShouldFilter = false;
}
}
else
{
ShouldFilter = false;
}
}
}

Expand Down
42 changes: 1 addition & 41 deletions Common/Python/PythonSlice.cs
Original file line number Diff line number Diff line change
Expand Up @@ -27,29 +27,6 @@ namespace QuantConnect.Python
public class PythonSlice : Slice
{
private readonly Slice _slice;
private static readonly PyObject _converter;

static PythonSlice()
{
using (Py.GIL())
{
// Python Data class: Converts custom data (PythonData) into a python object'''
_converter = PyModule.FromString("converter",
"class Data(object):\n" +
" def __init__(self, data):\n" +
" self.data = data\n" +
" members = [attr for attr in dir(data) if not callable(attr) and not attr.startswith(\"__\")]\n" +
" for member in members:\n" +
" setattr(self, member, getattr(data, member))\n" +
" for kvp in data.GetStorageDictionary():\n" +
" name = kvp.Key.replace('-',' ').replace('.',' ').title().replace(' ', '')\n" +
" value = kvp.Value if isinstance(kvp.Value, float) else kvp.Value\n" +
" setattr(self, name, value)\n" +

" def __str__(self):\n" +
" return self.data.ToString()");
}
}

/// <summary>
/// Initializes a new instance of the <see cref="PythonSlice"/> class
Expand Down Expand Up @@ -122,24 +99,7 @@ public override dynamic this[Symbol symbol]
{
get
{
var data = _slice[symbol];

var dynamicData = data as DynamicData;
if (dynamicData != null)
{
try
{
using (Py.GIL())
{
return _converter.InvokeMethod("Data", new[] { dynamicData.ToPython() });
}
}
catch
{
// NOP
}
}
return data;
return _slice[symbol];
}
}

Expand Down
2 changes: 1 addition & 1 deletion Common/QuantConnect.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
<Message Text="SelectedOptimization $(SelectedOptimization)" Importance="high" />
</Target>
<ItemGroup>
<PackageReference Include="QuantConnect.pythonnet" Version="2.0.40" />
<PackageReference Include="QuantConnect.pythonnet" Version="2.0.41" />
<PackageReference Include="CloneExtensions" Version="1.3.0" />
<PackageReference Include="fasterflect" Version="3.0.0" />
<PackageReference Include="MathNet.Numerics" Version="5.0.0" />
Expand Down
6 changes: 5 additions & 1 deletion Engine/DataFeeds/BaseDataCollectionAggregatorReader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -38,11 +38,15 @@ public class BaseDataCollectionAggregatorReader : TextSubscriptionDataSourceRead
/// <param name="config">The subscription's configuration</param>
/// <param name="date">The date this factory was produced to read data for</param>
/// <param name="isLiveMode">True if we're in live mode, false for backtesting</param>
/// <param name="objectStore">The object storage for data persistence</param>
public BaseDataCollectionAggregatorReader(IDataCacheProvider dataCacheProvider, SubscriptionDataConfig config, DateTime date,
bool isLiveMode, IObjectStore objectStore)
: base(dataCacheProvider, config, date, isLiveMode, objectStore)
{
_collectionType = config.Type;
// if the type is not a BaseDataCollection, we'll default to BaseDataCollection.
// e.g. custom Python dynamic folding collections need to be aggregated into a BaseDataCollection,
// but they implement PythonData, so casting an instance of PythonData to BaseDataCollection will fail.
_collectionType = config.Type.IsAssignableTo(typeof(BaseDataCollection)) ? config.Type : typeof(BaseDataCollection);
}

/// <summary>
Expand Down
2 changes: 1 addition & 1 deletion Engine/QuantConnect.Lean.Engine.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@
<Message Text="SelectedOptimization $(SelectedOptimization)" Importance="high" />
</Target>
<ItemGroup>
<PackageReference Include="QuantConnect.pythonnet" Version="2.0.40" />
<PackageReference Include="QuantConnect.pythonnet" Version="2.0.41" />
<PackageReference Include="fasterflect" Version="3.0.0" />
<PackageReference Include="MathNet.Numerics" Version="5.0.0" />
<PackageReference Include="Newtonsoft.Json" Version="13.0.2" />
Expand Down
2 changes: 1 addition & 1 deletion Indicators/QuantConnect.Indicators.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
<Message Text="SelectedOptimization $(SelectedOptimization)" Importance="high" />
</Target>
<ItemGroup>
<PackageReference Include="QuantConnect.pythonnet" Version="2.0.40" />
<PackageReference Include="QuantConnect.pythonnet" Version="2.0.41" />
<PackageReference Include="MathNet.Numerics" Version="5.0.0" />
</ItemGroup>
<ItemGroup>
Expand Down
2 changes: 1 addition & 1 deletion Report/QuantConnect.Report.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@
<PackageLicenseFile>LICENSE</PackageLicenseFile>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="QuantConnect.pythonnet" Version="2.0.40" />
<PackageReference Include="QuantConnect.pythonnet" Version="2.0.41" />
<PackageReference Include="Deedle" Version="2.1.0" />
<PackageReference Include="MathNet.Numerics" Version="5.0.0" />
<PackageReference Include="Newtonsoft.Json" Version="13.0.2" />
Expand Down
2 changes: 1 addition & 1 deletion Research/QuantConnect.Research.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
<ItemGroup>
<PackageReference Include="Plotly.NET" Version="3.0.1" />
<PackageReference Include="Plotly.NET.Interactive" Version="3.0.2" />
<PackageReference Include="QuantConnect.pythonnet" Version="2.0.40" />
<PackageReference Include="QuantConnect.pythonnet" Version="2.0.41" />
<PackageReference Include="NodaTime" Version="3.0.5" />
</ItemGroup>
<ItemGroup>
Expand Down
159 changes: 159 additions & 0 deletions Tests/Algorithm/AlgorithmHistoryTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
using QuantConnect.Data.Fundamental;
using QuantConnect.Data.UniverseSelection;
using QuantConnect.Tests.Common.Data.Fundamental;
using QuantConnect.Logging;

namespace QuantConnect.Tests.Algorithm
{
Expand Down Expand Up @@ -3296,6 +3297,164 @@ assert isinstance(constituent, Fundamental), f'Unflattened DF: expected a list o
}
}

[Test]
public void CSharpCustomUniverseHistoryDataFramesHaveExpectedFormat()
{
var algorithm = GetAlgorithm(new DateTime(2015, 01, 15));
var universe = algorithm.AddUniverse<CustomUniverseData>("CustomUniverse", Resolution.Daily, (x) => x.Select(y => y.Symbol));

using (Py.GIL())
{
PythonInitializer.Initialize();
algorithm.SetPandasConverter();

using var testModule = PyModule.FromString("PythonCustomUniverseHistoryDataFramesHaveExpectedFormat",
$@"
from AlgorithmImports import *
def get_universe_history(algorithm, universe, flatten):
return algorithm.history(universe, 3, flatten=flatten)
");

dynamic getUniverseHistory = testModule.GetAttr("get_universe_history");
var df = getUniverseHistory(algorithm, universe, false);
var flattenedDf = getUniverseHistory(algorithm, universe, true);

Func<CustomUniverseData, decimal> getWeight = (data) => data.Weight;
AssertCustomUniverseDataFrames(df, flattenedDf, getWeight);

var columns = ((List<PyObject>)flattenedDf.columns.to_list().As<List<PyObject>>())
.Select(column => column.InvokeMethod("__str__").GetAndDispose<string>());
CollectionAssert.DoesNotContain(columns, "data");
}
}

[Test]
public void PythonCustomUniverseHistoryDataFramesHaveExpectedFormat()
{
var algorithm = GetAlgorithm(new DateTime(2015, 01, 15));

using (Py.GIL())
{
PythonInitializer.Initialize();
algorithm.SetPandasConverter();

using var testModule = PyModule.FromString("PythonCustomUniverseHistoryDataFramesHaveExpectedFormat",
$@"
from AlgorithmImports import *
class CustomUniverseData(PythonData):
def get_source(self, config: SubscriptionDataConfig, date: datetime, is_live_mode: bool) -> SubscriptionDataSource:
return SubscriptionDataSource('TestData/portfolio_targets.csv',
SubscriptionTransportMedium.LOCAL_FILE,
FileFormat.FOLDING_COLLECTION)
def reader(self, config: SubscriptionDataConfig, line: str, date: datetime, is_live_mode: bool) -> BaseData:
# Skip the header row.
if not line[0].isnumeric():
return None
items = line.split(',')
data = CustomUniverseData()
data.end_time = datetime.strptime(items[0], '%Y-%m-%d')
data.time = data.end_time - timedelta(1)
data.symbol = Symbol.create(items[1], SecurityType.EQUITY, Market.USA)
data['weight'] = float(items[2])
return data
def get_universe_history(algorithm, flatten):
universe = algorithm.add_universe(CustomUniverseData, 'CustomUniverse', Resolution.DAILY, lambda alt_coarse: [x.symbol for x in alt_coarse])
return algorithm.history(universe, 3, flatten=flatten)
");

dynamic getUniverseHistory = testModule.GetAttr("get_universe_history");
var df = getUniverseHistory(algorithm, false);
var flattenedDf = getUniverseHistory(algorithm, true);

Func<PythonData, decimal> getWeight = (data) => Convert.ToDecimal(data.GetProperty("weight"));
AssertCustomUniverseDataFrames(df, flattenedDf, getWeight);
}
}

public class CustomUniverseData : BaseDataCollection
{
public decimal Weight { get; private set; }

public override SubscriptionDataSource GetSource(SubscriptionDataConfig config, DateTime date, bool isLiveMode)
{
return new SubscriptionDataSource("TestData/portfolio_targets.csv",
SubscriptionTransportMedium.LocalFile,
FileFormat.FoldingCollection);
}

public override BaseData Reader(SubscriptionDataConfig config, string line, DateTime date, bool isLiveMode)
{
var csv = line.Split(',');

try
{
var endTime = DateTime.ParseExact(csv[0], "yyyy-MM-dd", CultureInfo.InvariantCulture);
var symbol = Symbol.Create(csv[1], SecurityType.Equity, Market.USA);
var weight = Convert.ToDecimal(csv[2], CultureInfo.InvariantCulture);

return new CustomUniverseData
{
Symbol = symbol,
Time = endTime - TimeSpan.FromDays(1),
EndTime = endTime,
Weight = weight
};
}
catch
{
return null;
}
}
}

private static void AssertCustomUniverseDataFrames<T>(dynamic df, dynamic flattenedDf, Func<T, decimal> getWeight)
where T : BaseData
{
var expectedDates = new List<DateTime>
{
new DateTime(2015, 01, 13),
new DateTime(2015, 01, 14),
new DateTime(2015, 01, 15),
};

var flattenedDfDates = ((List<DateTime>)flattenedDf.index.get_level_values(0).to_list().As<List<DateTime>>()).Distinct().ToList();
CollectionAssert.AreEqual(expectedDates, flattenedDfDates);

var dfDates = ((List<DateTime>)df.index.get_level_values(1).to_list().As<List<DateTime>>()).Distinct().ToList();
CollectionAssert.AreEqual(expectedDates, dfDates);

df = df.droplevel(0); // drop symbol just to make access easier
foreach (var date in expectedDates)
{
using var pyDate = date.ToPython();
var constituents = (List<T>)df.loc[pyDate].As<List<T>>();
var flattendDfConstituents = flattenedDf.loc[pyDate];

CollectionAssert.IsNotEmpty(constituents);
Assert.AreEqual(flattendDfConstituents.shape[0].As<int>(), constituents.Count);

var constituentsSymbols = constituents.Select(x => x.Symbol).ToList();
var flattendDfConstituentsSymbols = ((List<Symbol>)flattendDfConstituents.index.to_list().As<List<Symbol>>()).ToList();
CollectionAssert.AreEqual(flattendDfConstituentsSymbols, constituentsSymbols);

var constituentsWeights = constituents.Select(x => getWeight(x)).ToList();
var flattendDfConstituentsWeights = constituentsSymbols
.Select(symbol => flattendDfConstituents.loc[symbol.ToPython()]["weight"].As<decimal>())
.Cast<decimal>()
.ToList();
CollectionAssert.AreEqual(flattendDfConstituentsWeights, constituentsWeights);
}

Log.Debug((string)df.to_string());
Log.Debug((string)flattenedDf.to_string());
}

private static void AssertDesNotThrowPythonException(Action action)
{
try
Expand Down
5 changes: 4 additions & 1 deletion Tests/QuantConnect.Tests.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
</PropertyGroup>
<Import Project="$(SolutionDir)\.nuget\NuGet.targets" Condition="Exists('$(SolutionDir)\.nuget\NuGet.targets')" />
<ItemGroup>
<PackageReference Include="QuantConnect.pythonnet" Version="2.0.40" />
<PackageReference Include="QuantConnect.pythonnet" Version="2.0.41" />
<PackageReference Include="Accord" Version="3.6.0" />
<PackageReference Include="Accord.Math" Version="3.6.0" />
<PackageReference Include="Common.Logging" Version="3.4.1" />
Expand Down Expand Up @@ -240,6 +240,9 @@
<None Include="TestData\daily-stock-picker-live.csv">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
<None Include="TestData\portfolio_targets.csv">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
<None Include="TestData\FillForwardBars.zip">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
Expand Down
10 changes: 10 additions & 0 deletions Tests/TestData/portfolio_targets.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
Date,Symbol,Weight
2015-01-13,TLT,0.6403554273566532
2015-01-13,GLD,0.2966005853128983
2015-01-13,IWM,0.06304398733044848
2015-01-14,USO,0.5873635006180897
2015-01-14,GLD,0.19451676316704644
2015-01-14,TLT,0.2181197362148639
2015-01-15,IWM,0.563722959965805
2015-01-15,SPY,0.3327542780145993
2015-01-15,TLT,0.10352276201959563

0 comments on commit a28d1f2

Please sign in to comment.