From a28d1f2ffe164a455d2a27edce85e8b3e2d75f21 Mon Sep 17 00:00:00 2001 From: Jhonathan Abreu Date: Tue, 3 Dec 2024 16:19:17 -0400 Subject: [PATCH] Universe data frames improvements (#8433) * Default Data to null for ETFConstituentUniverses. The data collection will be assigned only if needed. This allows data column to be filtered from dataframes since it will always be null for all constituents. * Make base data collection aggregator reader fall back to BaseDataCollection After instatiating the collection type, fall back to the base BaseDataCollection to aggregate data if the type is not a base data collection. * Minor change * Minor change * Update pythonnet to 2.0.41 * Ignore data column for every flattened universe dataframe * Filter empty collections columns in data frames * Allow snake case named attributes in PythonSlice * Remove PythonSlice Data Python class Pythonnet handles dynamic objects behavior --- .../QuantConnect.Algorithm.CSharp.csproj | 2 +- .../QuantConnect.Algorithm.Framework.csproj | 2 +- .../QuantConnect.Algorithm.Python.csproj | 2 +- Algorithm/QuantConnect.Algorithm.csproj | 2 +- .../QuantConnect.AlgorithmFactory.csproj | 2 +- Common/Python/PandasData.cs | 12 +- Common/Python/PythonSlice.cs | 42 +---- Common/QuantConnect.csproj | 2 +- .../BaseDataCollectionAggregatorReader.cs | 6 +- Engine/QuantConnect.Lean.Engine.csproj | 2 +- Indicators/QuantConnect.Indicators.csproj | 2 +- Report/QuantConnect.Report.csproj | 2 +- Research/QuantConnect.Research.csproj | 2 +- Tests/Algorithm/AlgorithmHistoryTests.cs | 159 ++++++++++++++++++ Tests/QuantConnect.Tests.csproj | 5 +- Tests/TestData/portfolio_targets.csv | 10 ++ 16 files changed, 200 insertions(+), 54 deletions(-) create mode 100644 Tests/TestData/portfolio_targets.csv diff --git a/Algorithm.CSharp/QuantConnect.Algorithm.CSharp.csproj b/Algorithm.CSharp/QuantConnect.Algorithm.CSharp.csproj index 6f280b01c81f..bf9c3ff262da 100644 --- a/Algorithm.CSharp/QuantConnect.Algorithm.CSharp.csproj +++ b/Algorithm.CSharp/QuantConnect.Algorithm.CSharp.csproj @@ -34,7 +34,7 @@ portable - + diff --git a/Algorithm.Framework/QuantConnect.Algorithm.Framework.csproj b/Algorithm.Framework/QuantConnect.Algorithm.Framework.csproj index 5287a774fbe3..fa1f265c6a54 100644 --- a/Algorithm.Framework/QuantConnect.Algorithm.Framework.csproj +++ b/Algorithm.Framework/QuantConnect.Algorithm.Framework.csproj @@ -30,7 +30,7 @@ LICENSE - + diff --git a/Algorithm.Python/QuantConnect.Algorithm.Python.csproj b/Algorithm.Python/QuantConnect.Algorithm.Python.csproj index f9515feb4527..eab90b412dad 100644 --- a/Algorithm.Python/QuantConnect.Algorithm.Python.csproj +++ b/Algorithm.Python/QuantConnect.Algorithm.Python.csproj @@ -39,7 +39,7 @@ - + diff --git a/Algorithm/QuantConnect.Algorithm.csproj b/Algorithm/QuantConnect.Algorithm.csproj index 62df04f561ab..af31c8e90149 100644 --- a/Algorithm/QuantConnect.Algorithm.csproj +++ b/Algorithm/QuantConnect.Algorithm.csproj @@ -30,7 +30,7 @@ LICENSE - + diff --git a/AlgorithmFactory/QuantConnect.AlgorithmFactory.csproj b/AlgorithmFactory/QuantConnect.AlgorithmFactory.csproj index 5b9f264f991a..29f454310502 100644 --- a/AlgorithmFactory/QuantConnect.AlgorithmFactory.csproj +++ b/AlgorithmFactory/QuantConnect.AlgorithmFactory.csproj @@ -29,7 +29,7 @@ LICENSE - + diff --git a/Common/Python/PandasData.cs b/Common/Python/PandasData.cs index 9db4e22dd17e..9fecd394c7c4 100644 --- a/Common/Python/PandasData.cs +++ b/Common/Python/PandasData.cs @@ -710,7 +710,17 @@ public void Add(DateTime time, object input, bool overrideValues) } else if (value != null) { - ShouldFilter = false; + if (value is ICollection enumerable) + { + if (enumerable.Count != 0) + { + ShouldFilter = false; + } + } + else + { + ShouldFilter = false; + } } } diff --git a/Common/Python/PythonSlice.cs b/Common/Python/PythonSlice.cs index dfb43e44b192..f445d65d4f4d 100644 --- a/Common/Python/PythonSlice.cs +++ b/Common/Python/PythonSlice.cs @@ -27,29 +27,6 @@ namespace QuantConnect.Python public class PythonSlice : Slice { private readonly Slice _slice; - private static readonly PyObject _converter; - - static PythonSlice() - { - using (Py.GIL()) - { - // Python Data class: Converts custom data (PythonData) into a python object''' - _converter = PyModule.FromString("converter", - "class Data(object):\n" + - " def __init__(self, data):\n" + - " self.data = data\n" + - " members = [attr for attr in dir(data) if not callable(attr) and not attr.startswith(\"__\")]\n" + - " for member in members:\n" + - " setattr(self, member, getattr(data, member))\n" + - " for kvp in data.GetStorageDictionary():\n" + - " name = kvp.Key.replace('-',' ').replace('.',' ').title().replace(' ', '')\n" + - " value = kvp.Value if isinstance(kvp.Value, float) else kvp.Value\n" + - " setattr(self, name, value)\n" + - - " def __str__(self):\n" + - " return self.data.ToString()"); - } - } /// /// Initializes a new instance of the class @@ -122,24 +99,7 @@ public override dynamic this[Symbol symbol] { get { - var data = _slice[symbol]; - - var dynamicData = data as DynamicData; - if (dynamicData != null) - { - try - { - using (Py.GIL()) - { - return _converter.InvokeMethod("Data", new[] { dynamicData.ToPython() }); - } - } - catch - { - // NOP - } - } - return data; + return _slice[symbol]; } } diff --git a/Common/QuantConnect.csproj b/Common/QuantConnect.csproj index 36a35dd62caf..e9fac89bc4a2 100644 --- a/Common/QuantConnect.csproj +++ b/Common/QuantConnect.csproj @@ -35,7 +35,7 @@ - + diff --git a/Engine/DataFeeds/BaseDataCollectionAggregatorReader.cs b/Engine/DataFeeds/BaseDataCollectionAggregatorReader.cs index 87f079209b2b..79753928c746 100644 --- a/Engine/DataFeeds/BaseDataCollectionAggregatorReader.cs +++ b/Engine/DataFeeds/BaseDataCollectionAggregatorReader.cs @@ -38,11 +38,15 @@ public class BaseDataCollectionAggregatorReader : TextSubscriptionDataSourceRead /// The subscription's configuration /// The date this factory was produced to read data for /// True if we're in live mode, false for backtesting + /// The object storage for data persistence public BaseDataCollectionAggregatorReader(IDataCacheProvider dataCacheProvider, SubscriptionDataConfig config, DateTime date, bool isLiveMode, IObjectStore objectStore) : base(dataCacheProvider, config, date, isLiveMode, objectStore) { - _collectionType = config.Type; + // if the type is not a BaseDataCollection, we'll default to BaseDataCollection. + // e.g. custom Python dynamic folding collections need to be aggregated into a BaseDataCollection, + // but they implement PythonData, so casting an instance of PythonData to BaseDataCollection will fail. + _collectionType = config.Type.IsAssignableTo(typeof(BaseDataCollection)) ? config.Type : typeof(BaseDataCollection); } /// diff --git a/Engine/QuantConnect.Lean.Engine.csproj b/Engine/QuantConnect.Lean.Engine.csproj index 1de590cd277b..5d4f43417f73 100644 --- a/Engine/QuantConnect.Lean.Engine.csproj +++ b/Engine/QuantConnect.Lean.Engine.csproj @@ -43,7 +43,7 @@ - + diff --git a/Indicators/QuantConnect.Indicators.csproj b/Indicators/QuantConnect.Indicators.csproj index 9f63a76a7822..b044f2f14180 100644 --- a/Indicators/QuantConnect.Indicators.csproj +++ b/Indicators/QuantConnect.Indicators.csproj @@ -32,7 +32,7 @@ - + diff --git a/Report/QuantConnect.Report.csproj b/Report/QuantConnect.Report.csproj index 0561cc784707..d35d6b1d3e04 100644 --- a/Report/QuantConnect.Report.csproj +++ b/Report/QuantConnect.Report.csproj @@ -41,7 +41,7 @@ LICENSE - + diff --git a/Research/QuantConnect.Research.csproj b/Research/QuantConnect.Research.csproj index c77eedc742be..ba538810071f 100644 --- a/Research/QuantConnect.Research.csproj +++ b/Research/QuantConnect.Research.csproj @@ -34,7 +34,7 @@ - + diff --git a/Tests/Algorithm/AlgorithmHistoryTests.cs b/Tests/Algorithm/AlgorithmHistoryTests.cs index 56ec1e5cf528..4a36a76fcb85 100644 --- a/Tests/Algorithm/AlgorithmHistoryTests.cs +++ b/Tests/Algorithm/AlgorithmHistoryTests.cs @@ -37,6 +37,7 @@ using QuantConnect.Data.Fundamental; using QuantConnect.Data.UniverseSelection; using QuantConnect.Tests.Common.Data.Fundamental; +using QuantConnect.Logging; namespace QuantConnect.Tests.Algorithm { @@ -3296,6 +3297,164 @@ assert isinstance(constituent, Fundamental), f'Unflattened DF: expected a list o } } + [Test] + public void CSharpCustomUniverseHistoryDataFramesHaveExpectedFormat() + { + var algorithm = GetAlgorithm(new DateTime(2015, 01, 15)); + var universe = algorithm.AddUniverse("CustomUniverse", Resolution.Daily, (x) => x.Select(y => y.Symbol)); + + using (Py.GIL()) + { + PythonInitializer.Initialize(); + algorithm.SetPandasConverter(); + + using var testModule = PyModule.FromString("PythonCustomUniverseHistoryDataFramesHaveExpectedFormat", + $@" +from AlgorithmImports import * + +def get_universe_history(algorithm, universe, flatten): + return algorithm.history(universe, 3, flatten=flatten) + "); + + dynamic getUniverseHistory = testModule.GetAttr("get_universe_history"); + var df = getUniverseHistory(algorithm, universe, false); + var flattenedDf = getUniverseHistory(algorithm, universe, true); + + Func getWeight = (data) => data.Weight; + AssertCustomUniverseDataFrames(df, flattenedDf, getWeight); + + var columns = ((List)flattenedDf.columns.to_list().As>()) + .Select(column => column.InvokeMethod("__str__").GetAndDispose()); + CollectionAssert.DoesNotContain(columns, "data"); + } + } + + [Test] + public void PythonCustomUniverseHistoryDataFramesHaveExpectedFormat() + { + var algorithm = GetAlgorithm(new DateTime(2015, 01, 15)); + + using (Py.GIL()) + { + PythonInitializer.Initialize(); + algorithm.SetPandasConverter(); + + using var testModule = PyModule.FromString("PythonCustomUniverseHistoryDataFramesHaveExpectedFormat", + $@" +from AlgorithmImports import * + +class CustomUniverseData(PythonData): + + def get_source(self, config: SubscriptionDataConfig, date: datetime, is_live_mode: bool) -> SubscriptionDataSource: + return SubscriptionDataSource('TestData/portfolio_targets.csv', + SubscriptionTransportMedium.LOCAL_FILE, + FileFormat.FOLDING_COLLECTION) + + def reader(self, config: SubscriptionDataConfig, line: str, date: datetime, is_live_mode: bool) -> BaseData: + # Skip the header row. + if not line[0].isnumeric(): + return None + items = line.split(',') + data = CustomUniverseData() + data.end_time = datetime.strptime(items[0], '%Y-%m-%d') + data.time = data.end_time - timedelta(1) + data.symbol = Symbol.create(items[1], SecurityType.EQUITY, Market.USA) + data['weight'] = float(items[2]) + return data + +def get_universe_history(algorithm, flatten): + universe = algorithm.add_universe(CustomUniverseData, 'CustomUniverse', Resolution.DAILY, lambda alt_coarse: [x.symbol for x in alt_coarse]) + return algorithm.history(universe, 3, flatten=flatten) + + "); + + dynamic getUniverseHistory = testModule.GetAttr("get_universe_history"); + var df = getUniverseHistory(algorithm, false); + var flattenedDf = getUniverseHistory(algorithm, true); + + Func getWeight = (data) => Convert.ToDecimal(data.GetProperty("weight")); + AssertCustomUniverseDataFrames(df, flattenedDf, getWeight); + } + } + + public class CustomUniverseData : BaseDataCollection + { + public decimal Weight { get; private set; } + + public override SubscriptionDataSource GetSource(SubscriptionDataConfig config, DateTime date, bool isLiveMode) + { + return new SubscriptionDataSource("TestData/portfolio_targets.csv", + SubscriptionTransportMedium.LocalFile, + FileFormat.FoldingCollection); + } + + public override BaseData Reader(SubscriptionDataConfig config, string line, DateTime date, bool isLiveMode) + { + var csv = line.Split(','); + + try + { + var endTime = DateTime.ParseExact(csv[0], "yyyy-MM-dd", CultureInfo.InvariantCulture); + var symbol = Symbol.Create(csv[1], SecurityType.Equity, Market.USA); + var weight = Convert.ToDecimal(csv[2], CultureInfo.InvariantCulture); + + return new CustomUniverseData + { + Symbol = symbol, + Time = endTime - TimeSpan.FromDays(1), + EndTime = endTime, + Weight = weight + }; + } + catch + { + return null; + } + } + } + + private static void AssertCustomUniverseDataFrames(dynamic df, dynamic flattenedDf, Func getWeight) + where T : BaseData + { + var expectedDates = new List + { + new DateTime(2015, 01, 13), + new DateTime(2015, 01, 14), + new DateTime(2015, 01, 15), + }; + + var flattenedDfDates = ((List)flattenedDf.index.get_level_values(0).to_list().As>()).Distinct().ToList(); + CollectionAssert.AreEqual(expectedDates, flattenedDfDates); + + var dfDates = ((List)df.index.get_level_values(1).to_list().As>()).Distinct().ToList(); + CollectionAssert.AreEqual(expectedDates, dfDates); + + df = df.droplevel(0); // drop symbol just to make access easier + foreach (var date in expectedDates) + { + using var pyDate = date.ToPython(); + var constituents = (List)df.loc[pyDate].As>(); + var flattendDfConstituents = flattenedDf.loc[pyDate]; + + CollectionAssert.IsNotEmpty(constituents); + Assert.AreEqual(flattendDfConstituents.shape[0].As(), constituents.Count); + + var constituentsSymbols = constituents.Select(x => x.Symbol).ToList(); + var flattendDfConstituentsSymbols = ((List)flattendDfConstituents.index.to_list().As>()).ToList(); + CollectionAssert.AreEqual(flattendDfConstituentsSymbols, constituentsSymbols); + + var constituentsWeights = constituents.Select(x => getWeight(x)).ToList(); + var flattendDfConstituentsWeights = constituentsSymbols + .Select(symbol => flattendDfConstituents.loc[symbol.ToPython()]["weight"].As()) + .Cast() + .ToList(); + CollectionAssert.AreEqual(flattendDfConstituentsWeights, constituentsWeights); + } + + Log.Debug((string)df.to_string()); + Log.Debug((string)flattenedDf.to_string()); + } + private static void AssertDesNotThrowPythonException(Action action) { try diff --git a/Tests/QuantConnect.Tests.csproj b/Tests/QuantConnect.Tests.csproj index 3255f586bf4f..c2424bbac661 100644 --- a/Tests/QuantConnect.Tests.csproj +++ b/Tests/QuantConnect.Tests.csproj @@ -33,7 +33,7 @@ - + @@ -240,6 +240,9 @@ PreserveNewest + + PreserveNewest + PreserveNewest diff --git a/Tests/TestData/portfolio_targets.csv b/Tests/TestData/portfolio_targets.csv new file mode 100644 index 000000000000..5ea7d8305c8f --- /dev/null +++ b/Tests/TestData/portfolio_targets.csv @@ -0,0 +1,10 @@ +Date,Symbol,Weight +2015-01-13,TLT,0.6403554273566532 +2015-01-13,GLD,0.2966005853128983 +2015-01-13,IWM,0.06304398733044848 +2015-01-14,USO,0.5873635006180897 +2015-01-14,GLD,0.19451676316704644 +2015-01-14,TLT,0.2181197362148639 +2015-01-15,IWM,0.563722959965805 +2015-01-15,SPY,0.3327542780145993 +2015-01-15,TLT,0.10352276201959563