Skip to content

Commit

Permalink
create light lucene maintainer, builder utilities
Browse files Browse the repository at this point in the history
* commit

* fix build failed

* commit

* create lucene pool light

* style fix

* commit

* commit

* undo

* 123

* commit

* commit

* commit

* commit
  • Loading branch information
qiuhaotc authored Dec 8, 2020
1 parent 0bdead1 commit 1a6929b
Show file tree
Hide file tree
Showing 24 changed files with 1,344 additions and 142 deletions.
11 changes: 9 additions & 2 deletions src/CodeIndex.Common/CodeIndexConfiguration.cs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@ namespace CodeIndex.Common
public class CodeIndexConfiguration
{
public const char SplitChar = '|';
public const string CodeIndexesFolder = "CodeIndexes";
public const string ConfigurationIndexFolder = "Configuration";
public const string CodeIndexFolder = "CodeIndex";
public const string HintIndexFolder = "HintIndex";

public string LuceneIndex { get; set; } = string.Empty;
public string MonitorFolder { get; set; } = string.Empty;
Expand All @@ -15,8 +19,8 @@ public class CodeIndexConfiguration
public int SaveIntervalSeconds { get; set; } = 300;
public string LocalUrl { get; set; } = string.Empty;
public string MonitorFolderRealPath { get; set; } = string.Empty;
public string LuceneIndexForCode => luceneIndexForCode ??= GetIndexPath("CodeIndex");
public string LuceneIndexForHint => luceneIndexForHint ??= GetIndexPath("HintIndex");
public string LuceneIndexForCode => luceneIndexForCode ??= GetIndexPath(CodeIndexFolder);
public string LuceneIndexForHint => luceneIndexForHint ??= GetIndexPath(HintIndexFolder);
public string ExcludedExtensions { get; set; } = string.Empty;
public string ExcludedPaths { get; set; } = string.Empty;
public string IncludedExtensions { get; set; } = string.Empty;
Expand Down Expand Up @@ -56,5 +60,8 @@ string[] GetSplitStringArray(string excludedExtensions)

return excludedExtensions.Split(SplitChar, StringSplitOptions.RemoveEmptyEntries);
}

string luceneConfigurationIndex;
public string LuceneConfigurationIndex => luceneConfigurationIndex ??= GetIndexPath(ConfigurationIndexFolder);
}
}
65 changes: 61 additions & 4 deletions src/CodeIndex.Common/IndexConfig.cs
Original file line number Diff line number Diff line change
@@ -1,21 +1,78 @@
using System;
using System.Collections.Generic;
using System.IO;

namespace CodeIndex.Common
{
public class IndexConfig
{
public const char SplitChar = '|';

public Guid Pk { get; set; }
public string IndexName { get; set; }
public string MonitorFolder { get; set; }
public IEnumerable<string> IncludedExtensions { get; set; }
public IEnumerable<string> ExcludedExtensions { get; set; }
public int MaxContentHighlightLength { get; set; }
public IEnumerable<string> ExcludedPaths { get; set; }
public int SaveIntervalSeconds { get; set; }
public string OpenIDEUriFormat { get; set; }
public string MonitorFolderRealPath { get; set; }
public DateTime IndexCreatedDate { get; set; }
public DateTime IndexLastUpdatedDate { get; set; }

public string ExcludedPaths
{
get => excludedPaths;
set
{
excludedPaths = value;
excludedPathsArray = null;
}
}

public string IncludedExtensions
{
get => includedExtensions;
set
{
includedExtensions = value;
includedExtensionsArray = null;
}
}

public string ExcludedExtensions
{
get => excludedExtensions;
set
{
excludedExtensions = value;
excludedExtensionsArray = null;
}
}

public string[] ExcludedPathsArray => excludedPathsArray ??= GetSplitStringArray(ExcludedPaths);

public string[] IncludedExtensionsArray => includedExtensionsArray ??= GetSplitStringArray(IncludedExtensions);

public string[] ExcludedExtensionsArray => excludedExtensionsArray ??= GetSplitStringArray(ExcludedExtensions);

public (string CodeIndexFolder,string HintIndexFolder) GetFolders(string parentFolder)
{
return (Path.Combine(parentFolder, IndexName, CodeIndexConfiguration.CodeIndexFolder), Path.Combine(parentFolder, IndexName, CodeIndexConfiguration.HintIndexFolder));
}

string[] GetSplitStringArray(string value)
{
if (string.IsNullOrEmpty(value))
{
return Array.Empty<string>();
}

return value.Split(SplitChar, StringSplitOptions.RemoveEmptyEntries);
}

string[] excludedPathsArray;
string[] includedExtensionsArray;
string[] excludedExtensionsArray;
string excludedPaths;
string includedExtensions;
string excludedExtensions;
}
}
6 changes: 4 additions & 2 deletions src/CodeIndex.Common/IndexStatus.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,11 @@
{
public enum IndexStatus
{
Created,
Idle,
Initializing,
Monitoring,
Deleting
Error,
Deleting,
Initialized
}
}
2 changes: 1 addition & 1 deletion src/CodeIndex.ConsoleApp/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ static void Main(string[] args)
var maintainer = new CodeFilesIndexMaintainer(config, logger);
maintainer.StartWatch();
initializer.InitializeIndex(config, out var failedIndexFiles);
maintainer.SetInitalizeFinishedToTrue(failedIndexFiles);
maintainer.SetInitializeFinishedToTrue(failedIndexFiles);

Console.WriteLine("Initialize complete");

Expand Down
216 changes: 216 additions & 0 deletions src/CodeIndex.IndexBuilder/CodeIndexBuilderLight.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,216 @@
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Threading;
using CodeIndex.Common;
using CodeIndex.Files;
using Lucene.Net.Documents;
using Lucene.Net.Index;
using Lucene.Net.Search;

namespace CodeIndex.IndexBuilder
{
public class CodeIndexBuilderLight : IDisposable
{
public CodeIndexBuilderLight(string name, LucenePoolLight codeIndexPool, LucenePoolLight hintIndexPool, ILog log)
{
name.RequireNotNullOrEmpty(nameof(name));
codeIndexPool.RequireNotNull(nameof(codeIndexPool));
hintIndexPool.RequireNotNull(nameof(hintIndexPool));
log.RequireNotNull(nameof(log));

Name = name;
CodeIndexPool = codeIndexPool;
HintIndexPool = hintIndexPool;
Log = log;
}

public string Name { get; }
public LucenePoolLight CodeIndexPool { get; }
public LucenePoolLight HintIndexPool { get; }
public ILog Log { get; }

public void InitIndexFolderIfNeeded()
{
if (!Directory.Exists(CodeIndexPool.LuceneIndex))
{
Log.Info($"Create {Name} index folder {CodeIndexPool.LuceneIndex}");
Directory.CreateDirectory(CodeIndexPool.LuceneIndex);
}

if (!Directory.Exists(HintIndexPool.LuceneIndex))
{
Log.Info($"Create {Name} index folder {HintIndexPool.LuceneIndex}");
Directory.CreateDirectory(HintIndexPool.LuceneIndex);
}
}

public void BuildIndexByBatch(IEnumerable<FileInfo> fileInfos, out List<FileInfo> failedIndexFiles, bool needCommit, bool triggerMerge, bool applyAllDeletes, CancellationToken cancellationToken, int batchSize = 10000)
{
fileInfos.RequireNotNull(nameof(fileInfos));
batchSize.RequireRange(nameof(batchSize), int.MaxValue, 50);

var codeDocuments = new List<Document>();
var hintWords = new List<string>();
failedIndexFiles = new List<FileInfo>();

foreach (var fileInfo in fileInfos)
{
cancellationToken.ThrowIfCancellationRequested();

try
{
if (fileInfo.Exists)
{
var source = CodeSource.GetCodeSource(fileInfo, FilesContentHelper.ReadAllText(fileInfo.FullName));

var words = WordSegmenter.GetWords(source.Content).Where(word => word.Length > 3 && word.Length < 200);
hintWords.AddRange(words);

var doc = CodeIndexBuilder.GetDocumentFromSource(source);
codeDocuments.Add(doc);

Log.Info($"{Name}: Add index For {source.FilePath}");
}
}
catch (Exception ex)
{
failedIndexFiles.Add(fileInfo);
Log.Error($"{Name}: Add index for {fileInfo.FullName} failed, exception: " + ex);
}

if (codeDocuments.Count >= batchSize)
{
BuildIndex(needCommit, triggerMerge, applyAllDeletes, codeDocuments, hintWords, cancellationToken);
codeDocuments.Clear();
hintWords.Clear();
}
}

if (codeDocuments.Count > 0)
{
BuildIndex(needCommit, triggerMerge, applyAllDeletes, codeDocuments, hintWords, cancellationToken);
}
}

public void DeleteAllIndex()
{
Log.Info($"{Name}: Delete All Index start");
CodeIndexPool.DeleteAllIndex();
HintIndexPool.DeleteAllIndex();
Log.Info($"{Name}: Delete All Index finished");
}

public IEnumerable<(string FilePath, DateTime LastWriteTimeUtc)> GetAllIndexedCodeSource()
{
return CodeIndexPool.Search(new MatchAllDocsQuery(), int.MaxValue).Select(u => (u.Get(nameof(CodeSource.FilePath)), new DateTime(long.Parse(u.Get(nameof(CodeSource.LastWriteTimeUtc)))))).ToList();
}

void BuildIndex(bool needCommit, bool triggerMerge, bool applyAllDeletes, List<Document> codeDocuments, List<string> words, CancellationToken cancellationToken)
{
cancellationToken.ThrowIfCancellationRequested();

Log.Info($"{Name}: Build code index start, documents count {codeDocuments.Count}");
CodeIndexPool.BuildIndex(codeDocuments, needCommit, triggerMerge, applyAllDeletes);
Log.Info($"{Name}: Build code index finished");

Log.Info($"{Name}: Build hint index start, documents count {words.Count}");
words.ForEach(word =>
{
cancellationToken.ThrowIfCancellationRequested();

HintIndexPool.UpdateIndex(new Term(nameof(CodeWord.Word), word), new Document
{
new StringField(nameof(CodeWord.Word), word, Field.Store.YES),
new StringField(nameof(CodeWord.WordLower), word.ToLowerInvariant(), Field.Store.YES)
});
});

if (needCommit || triggerMerge || applyAllDeletes)
{
HintIndexPool.Commit();
}

Log.Info($"{Name}: Build hint index finished");
}

public bool IsDisposing { get; private set; }

public void Dispose()
{
if (!IsDisposing)
{
IsDisposing = true;
CodeIndexPool.Dispose();
HintIndexPool.Dispose();
}
}

public bool UpdateIndex(FileInfo fileInfo, CancellationToken cancellationToken)
{
try
{
if (fileInfo.Exists)
{
var source = CodeSource.GetCodeSource(fileInfo, FilesContentHelper.ReadAllText(fileInfo.FullName));
var words = WordSegmenter.GetWords(source.Content).Where(word => word.Length > 3 && word.Length < 200).ToList();
var doc = CodeIndexBuilder.GetDocumentFromSource(source);
CodeIndexPool.UpdateIndex(GetNoneTokenizeFieldTerm(nameof(CodeSource.FilePath), source.FilePath), doc);
words.ForEach(word =>
{
cancellationToken.ThrowIfCancellationRequested();

HintIndexPool.UpdateIndex(new Term(nameof(CodeWord.Word), word), new Document
{
new StringField(nameof(CodeWord.Word), word, Field.Store.YES),
new StringField(nameof(CodeWord.WordLower), word.ToLowerInvariant(), Field.Store.YES)
});
});

Log.Info($"{Name}: Update index For {source.FilePath} finished");
}

return true;
}
catch (Exception ex)
{
Log.Error($"{Name}: Update index for {fileInfo.FullName} failed, exception: " + ex);

if (ex is OperationCanceledException)
{
throw;
}

return false;
}
}

public bool DeleteIndex(string filePath)
{
try
{
CodeIndexPool.DeleteIndex(GetNoneTokenizeFieldTerm(nameof(CodeSource.FilePath), filePath));
Log.Info($"{Name}: Delete index For {filePath} finished");

return true;
}
catch (Exception ex)
{
Log.Error($"{Name}: Delete index for {filePath} failed, exception: " + ex);
return false;
}
}

public void Commit()
{
CodeIndexPool.Commit();
HintIndexPool.Commit();
}

public Term GetNoneTokenizeFieldTerm(string fieldName, string termValue)
{
return new Term($"{fieldName}{Constants.NoneTokenizeFieldSuffix}", termValue);
}
}
}
28 changes: 28 additions & 0 deletions src/CodeIndex.IndexBuilder/ILucenePool.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
using System;
using System.Collections.Generic;
using Lucene.Net.Analysis;
using Lucene.Net.Documents;
using Lucene.Net.Index;
using Lucene.Net.Search;

namespace CodeIndex.IndexBuilder
{
public interface ILucenePool : IDisposable
{
void BuildIndex(IEnumerable<Document> documents, bool needCommit, bool triggerMerge = false, bool applyAllDeletes = false);

Document[] Search(Query query, int maxResults, Filter filter = null);

void DeleteIndex(params Query[] searchQueries);

void DeleteIndex(params Term[] terms);

void UpdateIndex(Term term, Document document);

void DeleteAllIndex();

string LuceneIndex { get; }

Analyzer Analyzer { get; }
}
}
Loading

0 comments on commit 1a6929b

Please sign in to comment.