From 2d4d332cae21cda0e44e62fac0e7296f78229b50 Mon Sep 17 00:00:00 2001 From: Shad Storhaug Date: Wed, 20 Nov 2024 01:37:27 +0700 Subject: [PATCH] Fixed encoding provider loading on .NET Framework (for .NET Standard 2.0 target) (#1036) * Lucene.Net.Analysis.Ja.Tools.ConnectionCostsWriter: Added using for Lucene.Net.Support * Lucene.Net.Analysis.Kuromoji + Lucene.Net.Analysis.SmartCn: Added EncodingProviderInitializer classes to ensure we don't load the encoding provider on a .NET Framework runtime when targeting netstandard2.0 (fixes #1025) --- .../JapaneseTokenizerFactory.cs | 7 +-- .../Util/EncodingProviderInitializer.cs | 52 +++++++++++++++++++ .../Tools/ConnectionCostsWriter.cs | 3 +- .../Tools/DictionaryBuilder.cs | 10 ++-- .../AnalyzerProfile.cs | 7 +-- .../Util/EncodingProviderInitializer.cs | 52 +++++++++++++++++++ .../Startup.cs | 7 ++- 7 files changed, 120 insertions(+), 18 deletions(-) create mode 100644 src/Lucene.Net.Analysis.Kuromoji/Support/Util/EncodingProviderInitializer.cs create mode 100644 src/Lucene.Net.Analysis.SmartCn/Support/Util/EncodingProviderInitializer.cs diff --git a/src/Lucene.Net.Analysis.Kuromoji/JapaneseTokenizerFactory.cs b/src/Lucene.Net.Analysis.Kuromoji/JapaneseTokenizerFactory.cs index e1a183e574..5595375e4a 100644 --- a/src/Lucene.Net.Analysis.Kuromoji/JapaneseTokenizerFactory.cs +++ b/src/Lucene.Net.Analysis.Kuromoji/JapaneseTokenizerFactory.cs @@ -74,11 +74,8 @@ public JapaneseTokenizerFactory(IDictionary args) static JapaneseTokenizerFactory() { -#if FEATURE_ENCODINGPROVIDERS - // Support for EUC-JP encoding. See: https://docs.microsoft.com/en-us/dotnet/api/system.text.codepagesencodingprovider?view=netcore-2.0 - var encodingProvider = System.Text.CodePagesEncodingProvider.Instance; - System.Text.Encoding.RegisterProvider(encodingProvider); -#endif + // LUCENENET: Support for EUC-JP encoding. See: https://docs.microsoft.com/en-us/dotnet/api/system.text.codepagesencodingprovider?view=netcore-2.0 + EncodingProviderInitializer.EnsureInitialized(); } public virtual void Inform(IResourceLoader loader) diff --git a/src/Lucene.Net.Analysis.Kuromoji/Support/Util/EncodingProviderInitializer.cs b/src/Lucene.Net.Analysis.Kuromoji/Support/Util/EncodingProviderInitializer.cs new file mode 100644 index 0000000000..b2ba1ce6c0 --- /dev/null +++ b/src/Lucene.Net.Analysis.Kuromoji/Support/Util/EncodingProviderInitializer.cs @@ -0,0 +1,52 @@ +using System; +using System.Diagnostics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Text; +using System.Threading; + +namespace Lucene.Net.Util +{ + /// + /// Loads the for the current runtime for support of + /// EUC-JP encoding. + /// + internal static class EncodingProviderInitializer + { + private static int initialized; + + private static bool IsNetFramework => +#if NETSTANDARD2_0 + RuntimeInformation.FrameworkDescription.StartsWith(".NET Framework", StringComparison.OrdinalIgnoreCase); +#elif NET40_OR_GREATER + true; +#else + false; +#endif + + [Conditional("FEATURE_ENCODINGPROVIDERS")] + public static void EnsureInitialized() + { + // Only allow a single thread to call this + if (0 != Interlocked.CompareExchange(ref initialized, 1, 0)) return; + +#if FEATURE_ENCODINGPROVIDERS + if (!IsNetFramework) + { + Initialize(); + } +#endif + } + +#if FEATURE_ENCODINGPROVIDERS + // NOTE: CodePagesEncodingProvider.Instance loads early, so we need this in a separate method to ensure + // that it isn't executed until after we know which runtime we are on. + [MethodImpl(MethodImplOptions.NoInlining)] + private static void Initialize() + { + // Support for EUC-JP encoding. See: https://docs.microsoft.com/en-us/dotnet/api/system.text.codepagesencodingprovider?view=netcore-2.0 + Encoding.RegisterProvider(CodePagesEncodingProvider.Instance); + } +#endif + } +} diff --git a/src/Lucene.Net.Analysis.Kuromoji/Tools/ConnectionCostsWriter.cs b/src/Lucene.Net.Analysis.Kuromoji/Tools/ConnectionCostsWriter.cs index 936999db26..b6c6c1bd5c 100644 --- a/src/Lucene.Net.Analysis.Kuromoji/Tools/ConnectionCostsWriter.cs +++ b/src/Lucene.Net.Analysis.Kuromoji/Tools/ConnectionCostsWriter.cs @@ -2,6 +2,7 @@ using Lucene.Net.Codecs; using Lucene.Net.Diagnostics; using Lucene.Net.Store; +using Lucene.Net.Support; using System.Diagnostics; using System.IO; @@ -37,7 +38,7 @@ public ConnectionCostsWriter(int forwardSize, int backwardSize) this.forwardSize = forwardSize; this.backwardSize = backwardSize; //this.costs = new short[backwardSize][forwardSize]; - this.costs = Support.RectangularArrays.ReturnRectangularArray(backwardSize, forwardSize); + this.costs = RectangularArrays.ReturnRectangularArray(backwardSize, forwardSize); } public void Add(int forwardId, int backwardId, int cost) diff --git a/src/Lucene.Net.Analysis.Kuromoji/Tools/DictionaryBuilder.cs b/src/Lucene.Net.Analysis.Kuromoji/Tools/DictionaryBuilder.cs index c6f222290c..19a5abbb65 100644 --- a/src/Lucene.Net.Analysis.Kuromoji/Tools/DictionaryBuilder.cs +++ b/src/Lucene.Net.Analysis.Kuromoji/Tools/DictionaryBuilder.cs @@ -1,4 +1,5 @@ -using System; +using Lucene.Net.Util; +using System; using Console = Lucene.Net.Util.SystemConsole; namespace Lucene.Net.Analysis.Ja.Util @@ -35,11 +36,8 @@ public enum DictionaryFormat { IPADIC, UNIDIC }; static DictionaryBuilder() { -#if FEATURE_ENCODINGPROVIDERS - // Support for EUC-JP encoding. See: https://docs.microsoft.com/en-us/dotnet/api/system.text.codepagesencodingprovider?view=netcore-2.0 - var encodingProvider = System.Text.CodePagesEncodingProvider.Instance; - System.Text.Encoding.RegisterProvider(encodingProvider); -#endif + // LUCENENET: Support for EUC-JP encoding. See: https://docs.microsoft.com/en-us/dotnet/api/system.text.codepagesencodingprovider?view=netcore-2.0 + EncodingProviderInitializer.EnsureInitialized(); } public static void Build(DictionaryFormat format, diff --git a/src/Lucene.Net.Analysis.SmartCn/AnalyzerProfile.cs b/src/Lucene.Net.Analysis.SmartCn/AnalyzerProfile.cs index 9ae93b526e..70c85a4400 100644 --- a/src/Lucene.Net.Analysis.SmartCn/AnalyzerProfile.cs +++ b/src/Lucene.Net.Analysis.SmartCn/AnalyzerProfile.cs @@ -58,11 +58,8 @@ static AnalyzerProfile() // from ever being loaded). private static void Init() { -#if FEATURE_ENCODINGPROVIDERS - // Support for GB2312 encoding. See: https://docs.microsoft.com/en-us/dotnet/api/system.text.codepagesencodingprovider?view=netcore-2.0 - var encodingProvider = System.Text.CodePagesEncodingProvider.Instance; - System.Text.Encoding.RegisterProvider(encodingProvider); -#endif + // LUCENENET: Support for GB2312 encoding. See: https://docs.microsoft.com/en-us/dotnet/api/system.text.codepagesencodingprovider?view=netcore-2.0 + EncodingProviderInitializer.EnsureInitialized(); string dirName = "smartcn-data"; //string propName = "analysis.properties"; diff --git a/src/Lucene.Net.Analysis.SmartCn/Support/Util/EncodingProviderInitializer.cs b/src/Lucene.Net.Analysis.SmartCn/Support/Util/EncodingProviderInitializer.cs new file mode 100644 index 0000000000..827ad2d5fc --- /dev/null +++ b/src/Lucene.Net.Analysis.SmartCn/Support/Util/EncodingProviderInitializer.cs @@ -0,0 +1,52 @@ +using System; +using System.Diagnostics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Text; +using System.Threading; + +namespace Lucene.Net.Util +{ + /// + /// Loads the for the current runtime for support of + /// GB2312 encoding. + /// + internal static class EncodingProviderInitializer + { + private static int initialized; + + private static bool IsNetFramework => +#if NETSTANDARD2_0 + RuntimeInformation.FrameworkDescription.StartsWith(".NET Framework", StringComparison.OrdinalIgnoreCase); +#elif NET40_OR_GREATER + true; +#else + false; +#endif + + [Conditional("FEATURE_ENCODINGPROVIDERS")] + public static void EnsureInitialized() + { + // Only allow a single thread to call this + if (0 != Interlocked.CompareExchange(ref initialized, 1, 0)) return; + +#if FEATURE_ENCODINGPROVIDERS + if (!IsNetFramework) + { + Initialize(); + } +#endif + } + +#if FEATURE_ENCODINGPROVIDERS + // NOTE: CodePagesEncodingProvider.Instance loads early, so we need this in a separate method to ensure + // that it isn't executed until after we know which runtime we are on. + [MethodImpl(MethodImplOptions.NoInlining)] + private static void Initialize() + { + // Support for GB2312 encoding. See: https://docs.microsoft.com/en-us/dotnet/api/system.text.codepagesencodingprovider?view=netcore-2.0 + Encoding.RegisterProvider(CodePagesEncodingProvider.Instance); + } +#endif + } +} diff --git a/src/Lucene.Net.Tests.Analysis.Common/Startup.cs b/src/Lucene.Net.Tests.Analysis.Common/Startup.cs index 07033b1b05..c0400a3514 100644 --- a/src/Lucene.Net.Tests.Analysis.Common/Startup.cs +++ b/src/Lucene.Net.Tests.Analysis.Common/Startup.cs @@ -27,7 +27,12 @@ protected override void TestFrameworkSetUp() // require it to be added as well when using Hunspell, but there is no reason to load // the code pages by default in Lucene.Net.Analysis.Common. It should be added by consumers // or Hunspell that require it. + // + // Note this is in the test project, which never uses netstandard2.0. If we were using + // netstandard2.0, we would need an extra check to deteremine if we are on .NET Framework, + // which doesn't support encoding providers. See EncodingProviderInitializer in the + // Lucene.Net.Analysis.Kuromoji project. System.Text.Encoding.RegisterProvider(System.Text.CodePagesEncodingProvider.Instance); #endif } -} \ No newline at end of file +}