Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Overhaul TokenStream ICloseable/IDisposable Patterns, #271 #1058

Draft
wants to merge 7 commits into
base: master
Choose a base branch
from
Original file line number Diff line number Diff line change
Expand Up @@ -433,13 +433,10 @@ public override void End()
this.offsetAtt.SetOffset(finalOffset, finalOffset);
}

protected override void Dispose(bool disposing)
public override void Close()
{
base.Dispose(disposing);
if (disposing)
{
this.initialized = false;
}
base.Close();
this.initialized = false;
}

public override void Reset()
Expand Down Expand Up @@ -570,13 +567,10 @@ private bool IsStopWord(string text)
return stopWords != null && stopWords.Contains(text);
}

protected override void Dispose(bool disposing)
public override void Close()
{
base.Dispose(disposing);
if (disposing)
{
this.str = null;
}
base.Close();
this.str = null;
}

public override void Reset()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,15 +21,15 @@ namespace Lucene.Net.Analysis.Miscellaneous
/// <summary>
/// Links two <see cref="PrefixAwareTokenFilter"/>.
/// <para/>
/// <b>NOTE:</b> This filter might not behave correctly if used with custom
/// <b>NOTE:</b> This filter might not behave correctly if used with custom
/// <see cref="Lucene.Net.Util.IAttribute"/>s, i.e. <see cref="Lucene.Net.Util.IAttribute"/>s other than
/// the ones located in Lucene.Net.Analysis.TokenAttributes.
/// the ones located in Lucene.Net.Analysis.TokenAttributes.
/// </summary>
public class PrefixAndSuffixAwareTokenFilter : TokenStream
{
private readonly PrefixAwareTokenFilter suffix;

public PrefixAndSuffixAwareTokenFilter(TokenStream prefix, TokenStream input, TokenStream suffix)
public PrefixAndSuffixAwareTokenFilter(TokenStream prefix, TokenStream input, TokenStream suffix)
: base(suffix)
{
prefix = new PrefixAwareTokenFilterAnonymousClass(this, prefix, input);
Expand All @@ -40,7 +40,7 @@ private sealed class PrefixAwareTokenFilterAnonymousClass : PrefixAwareTokenFilt
{
private readonly PrefixAndSuffixAwareTokenFilter outerInstance;

public PrefixAwareTokenFilterAnonymousClass(PrefixAndSuffixAwareTokenFilter outerInstance, TokenStream prefix, TokenStream input)
public PrefixAwareTokenFilterAnonymousClass(PrefixAndSuffixAwareTokenFilter outerInstance, TokenStream prefix, TokenStream input)
: base(prefix, input)
{
this.outerInstance = outerInstance;
Expand All @@ -56,7 +56,7 @@ private sealed class PrefixAwareTokenFilterAnonymousClass2 : PrefixAwareTokenFil
{
private readonly PrefixAndSuffixAwareTokenFilter outerInstance;

public PrefixAwareTokenFilterAnonymousClass2(PrefixAndSuffixAwareTokenFilter outerInstance, TokenStream prefix, TokenStream suffix)
public PrefixAwareTokenFilterAnonymousClass2(PrefixAndSuffixAwareTokenFilter outerInstance, TokenStream prefix, TokenStream suffix)
: base(prefix, suffix)
{
this.outerInstance = outerInstance;
Expand Down Expand Up @@ -90,18 +90,14 @@ public override void Reset()
suffix.Reset();
}

protected override void Dispose(bool disposing)
public override void Close()
{
if (disposing)
{
suffix.Dispose();
}
base.Dispose(disposing); // LUCENENET specific - disposable pattern requires calling the base class implementation
suffix.Close();
}

public override void End()
{
suffix.End();
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,10 @@ namespace Lucene.Net.Analysis.Miscellaneous
/// <summary>
/// Joins two token streams and leaves the last token of the first stream available
/// to be used when updating the token values in the second stream based on that token.
///
///
/// The default implementation adds last prefix token end offset to the suffix token start and end offsets.
/// <para/>
/// <b>NOTE:</b> This filter might not behave correctly if used with custom
/// <b>NOTE:</b> This filter might not behave correctly if used with custom
/// <see cref="Lucene.Net.Util.IAttribute"/>s, i.e. <see cref="Lucene.Net.Util.IAttribute"/>s other than
/// the ones located in Lucene.Net.Analysis.TokenAttributes.
/// </summary>
Expand Down Expand Up @@ -175,14 +175,10 @@ public override void End()
suffix.End();
}

protected override void Dispose(bool disposing)
public override void Close()
{
if (disposing)
{
prefix.Dispose();
suffix.Dispose();
}
base.Dispose(disposing); // LUCENENET specific - disposable pattern requires calling the base class implementation
prefix.Close();
suffix.Close();
}

public override void Reset()
Expand Down Expand Up @@ -211,4 +207,4 @@ public virtual TokenStream Suffix
set => this.suffix = value;
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -25,22 +25,22 @@ namespace Lucene.Net.Analysis.Standard

/// <summary>
/// A grammar-based tokenizer constructed with JFlex (and then ported to .NET)
///
///
/// <para> This should be a good tokenizer for most European-language documents:
///
///
/// <list type="bullet">
/// <item><description>Splits words at punctuation characters, removing punctuation. However, a
/// <item><description>Splits words at punctuation characters, removing punctuation. However, a
/// dot that's not followed by whitespace is considered part of a token.</description></item>
/// <item><description>Splits words at hyphens, unless there's a number in the token, in which case
/// the whole token is interpreted as a product number and is not split.</description></item>
/// <item><description>Recognizes email addresses and internet hostnames as one token.</description></item>
/// </list>
///
///
/// </para>
/// <para>Many applications have specific tokenizer needs. If this tokenizer does
/// not suit your application, please consider copying this source code
/// directory to your project and maintaining your own grammar-based tokenizer.
///
///
/// <see cref="ClassicTokenizer"/> was named <see cref="StandardTokenizer"/> in Lucene versions prior to 3.1.
/// As of 3.1, <see cref="StandardTokenizer"/> implements Unicode text segmentation,
/// as specified by UAX#29.
Expand Down Expand Up @@ -83,7 +83,7 @@ public sealed class ClassicTokenizer : Tokenizer

/// <summary>
/// Set the max allowed token length. Any token longer
/// than this is skipped.
/// than this is skipped.
/// </summary>
public int MaxTokenLength
{
Expand All @@ -103,7 +103,7 @@ public int MaxTokenLength
/// </summary>
/// <param name="matchVersion"> lucene compatibility version </param>
/// <param name="input"> The input reader
///
///
/// See http://issues.apache.org/jira/browse/LUCENE-1068 </param>
public ClassicTokenizer(LuceneVersion matchVersion, Reader input)
: base(input)
Expand All @@ -112,7 +112,7 @@ public ClassicTokenizer(LuceneVersion matchVersion, Reader input)
}

/// <summary>
/// Creates a new <see cref="ClassicTokenizer"/> with a given <see cref="AttributeSource.AttributeFactory"/>
/// Creates a new <see cref="ClassicTokenizer"/> with a given <see cref="AttributeSource.AttributeFactory"/>
/// </summary>
public ClassicTokenizer(LuceneVersion matchVersion, AttributeFactory factory, Reader input)
: base(factory, input)
Expand All @@ -135,7 +135,7 @@ private void Init(LuceneVersion matchVersion)
private IOffsetAttribute offsetAtt;
private IPositionIncrementAttribute posIncrAtt;
private ITypeAttribute typeAtt;

/*
* (non-Javadoc)
*
Expand Down Expand Up @@ -193,13 +193,10 @@ public override sealed void End()
posIncrAtt.PositionIncrement = posIncrAtt.PositionIncrement + skippedPositions;
}

protected override void Dispose(bool disposing)
public override void Close()
{
base.Dispose(disposing);
if (disposing)
{
scanner.YyReset(m_input);
}
base.Close();
scanner.YyReset(m_input);
}

public override void Reset()
Expand All @@ -209,4 +206,4 @@ public override void Reset()
skippedPositions = 0;
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -30,14 +30,14 @@ namespace Lucene.Net.Analysis.Standard
/// A grammar-based tokenizer constructed with JFlex.
/// <para>
/// As of Lucene version 3.1, this class implements the Word Break rules from the
/// Unicode Text Segmentation algorithm, as specified in
/// Unicode Text Segmentation algorithm, as specified in
/// <a href="http://unicode.org/reports/tr29/">Unicode Standard Annex #29</a>.
/// <p/>
/// </para>
/// <para>Many applications have specific tokenizer needs. If this tokenizer does
/// not suit your application, please consider copying this source code
/// directory to your project and maintaining your own grammar-based tokenizer.
///
///
/// </para>
/// <para>You must specify the required <see cref="LuceneVersion"/>
/// compatibility when creating <see cref="StandardTokenizer"/>:
Expand All @@ -58,25 +58,25 @@ public sealed class StandardTokenizer : Tokenizer
private IStandardTokenizerInterface scanner;

public const int ALPHANUM = 0;
/// @deprecated (3.1)
/// @deprecated (3.1)
[Obsolete("(3.1)")]
public const int APOSTROPHE = 1;
/// @deprecated (3.1)
/// @deprecated (3.1)
[Obsolete("(3.1)")]
public const int ACRONYM = 2;
/// @deprecated (3.1)
/// @deprecated (3.1)
[Obsolete("(3.1)")]
public const int COMPANY = 3;
public const int EMAIL = 4;
/// @deprecated (3.1)
/// @deprecated (3.1)
[Obsolete("(3.1)")]
public const int HOST = 5;
public const int NUM = 6;
/// @deprecated (3.1)
/// @deprecated (3.1)
[Obsolete("(3.1)")]
public const int CJ = 7;

/// @deprecated (3.1)
/// @deprecated (3.1)
[Obsolete("(3.1)")]
public const int ACRONYM_DEP = 8;

Expand Down Expand Up @@ -111,7 +111,7 @@ public sealed class StandardTokenizer : Tokenizer

/// <summary>
/// Set the max allowed token length. Any token longer
/// than this is skipped.
/// than this is skipped.
/// </summary>
public int MaxTokenLength
{
Expand All @@ -133,7 +133,7 @@ public int MaxTokenLength
/// </summary>
/// <param name="matchVersion"> Lucene compatibility version - See <see cref="StandardTokenizer"/> </param>
/// <param name="input"> The input reader
///
///
/// See http://issues.apache.org/jira/browse/LUCENE-1068 </param>
public StandardTokenizer(LuceneVersion matchVersion, TextReader input)
: base(input)
Expand All @@ -142,7 +142,7 @@ public StandardTokenizer(LuceneVersion matchVersion, TextReader input)
}

/// <summary>
/// Creates a new <see cref="StandardTokenizer"/> with a given <see cref="AttributeSource.AttributeFactory"/>
/// Creates a new <see cref="StandardTokenizer"/> with a given <see cref="AttributeSource.AttributeFactory"/>
/// </summary>
public StandardTokenizer(LuceneVersion matchVersion, AttributeFactory factory, TextReader input)
: base(factory, input)
Expand Down Expand Up @@ -248,13 +248,10 @@ public override sealed void End()
posIncrAtt.PositionIncrement = posIncrAtt.PositionIncrement + skippedPositions;
}

protected override void Dispose(bool disposing)
public override void Close()
{
base.Dispose(disposing);
if (disposing)
{
scanner.YyReset(m_input);
}
base.Close();
scanner.YyReset(m_input);
}

public override void Reset()
Expand All @@ -264,4 +261,4 @@ public override void Reset()
skippedPositions = 0;
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,9 @@ namespace Lucene.Net.Analysis.Standard
*/

/// <summary>
/// This class implements Word Break rules from the Unicode Text Segmentation
/// This class implements Word Break rules from the Unicode Text Segmentation
/// algorithm, as specified in `
/// <a href="http://unicode.org/reports/tr29/">Unicode Standard Annex #29</a>
/// <a href="http://unicode.org/reports/tr29/">Unicode Standard Annex #29</a>
/// URLs and email addresses are also tokenized according to the relevant RFCs.
/// <para/>
/// Tokens produced are of the following types:
Expand Down Expand Up @@ -89,7 +89,7 @@ public sealed class UAX29URLEmailTokenizer : Tokenizer

/// <summary>
/// Set the max allowed token length. Any token longer
/// than this is skipped.
/// than this is skipped.
/// </summary>
public int MaxTokenLength
{
Expand Down Expand Up @@ -214,13 +214,10 @@ public override sealed void End()
posIncrAtt.PositionIncrement = posIncrAtt.PositionIncrement + skippedPositions;
}

protected override void Dispose(bool disposing)
public override void Close()
{
base.Dispose(disposing);
if (disposing)
{
scanner.YyReset(m_input);
}
base.Close();
scanner.YyReset(m_input);
}

public override void Reset()
Expand All @@ -230,4 +227,4 @@ public override void Reset()
skippedPositions = 0;
}
}
}
}
Loading
Loading