-
Notifications
You must be signed in to change notification settings - Fork 17
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
create light lucene maintainer, builder utilities
* commit * fix build failed * commit * create lucene pool light * style fix * commit * commit * undo * 123 * commit * commit * commit * commit
- Loading branch information
Showing
24 changed files
with
1,344 additions
and
142 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,21 +1,78 @@ | ||
using System; | ||
using System.Collections.Generic; | ||
using System.IO; | ||
|
||
namespace CodeIndex.Common | ||
{ | ||
public class IndexConfig | ||
{ | ||
public const char SplitChar = '|'; | ||
|
||
public Guid Pk { get; set; } | ||
public string IndexName { get; set; } | ||
public string MonitorFolder { get; set; } | ||
public IEnumerable<string> IncludedExtensions { get; set; } | ||
public IEnumerable<string> ExcludedExtensions { get; set; } | ||
public int MaxContentHighlightLength { get; set; } | ||
public IEnumerable<string> ExcludedPaths { get; set; } | ||
public int SaveIntervalSeconds { get; set; } | ||
public string OpenIDEUriFormat { get; set; } | ||
public string MonitorFolderRealPath { get; set; } | ||
public DateTime IndexCreatedDate { get; set; } | ||
public DateTime IndexLastUpdatedDate { get; set; } | ||
|
||
public string ExcludedPaths | ||
{ | ||
get => excludedPaths; | ||
set | ||
{ | ||
excludedPaths = value; | ||
excludedPathsArray = null; | ||
} | ||
} | ||
|
||
public string IncludedExtensions | ||
{ | ||
get => includedExtensions; | ||
set | ||
{ | ||
includedExtensions = value; | ||
includedExtensionsArray = null; | ||
} | ||
} | ||
|
||
public string ExcludedExtensions | ||
{ | ||
get => excludedExtensions; | ||
set | ||
{ | ||
excludedExtensions = value; | ||
excludedExtensionsArray = null; | ||
} | ||
} | ||
|
||
public string[] ExcludedPathsArray => excludedPathsArray ??= GetSplitStringArray(ExcludedPaths); | ||
|
||
public string[] IncludedExtensionsArray => includedExtensionsArray ??= GetSplitStringArray(IncludedExtensions); | ||
|
||
public string[] ExcludedExtensionsArray => excludedExtensionsArray ??= GetSplitStringArray(ExcludedExtensions); | ||
|
||
public (string CodeIndexFolder,string HintIndexFolder) GetFolders(string parentFolder) | ||
{ | ||
return (Path.Combine(parentFolder, IndexName, CodeIndexConfiguration.CodeIndexFolder), Path.Combine(parentFolder, IndexName, CodeIndexConfiguration.HintIndexFolder)); | ||
} | ||
|
||
string[] GetSplitStringArray(string value) | ||
{ | ||
if (string.IsNullOrEmpty(value)) | ||
{ | ||
return Array.Empty<string>(); | ||
} | ||
|
||
return value.Split(SplitChar, StringSplitOptions.RemoveEmptyEntries); | ||
} | ||
|
||
string[] excludedPathsArray; | ||
string[] includedExtensionsArray; | ||
string[] excludedExtensionsArray; | ||
string excludedPaths; | ||
string includedExtensions; | ||
string excludedExtensions; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,9 +2,11 @@ | |
{ | ||
public enum IndexStatus | ||
{ | ||
Created, | ||
Idle, | ||
Initializing, | ||
Monitoring, | ||
Deleting | ||
Error, | ||
Deleting, | ||
Initialized | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,216 @@ | ||
using System; | ||
using System.Collections.Generic; | ||
using System.IO; | ||
using System.Linq; | ||
using System.Threading; | ||
using CodeIndex.Common; | ||
using CodeIndex.Files; | ||
using Lucene.Net.Documents; | ||
using Lucene.Net.Index; | ||
using Lucene.Net.Search; | ||
|
||
namespace CodeIndex.IndexBuilder | ||
{ | ||
public class CodeIndexBuilderLight : IDisposable | ||
{ | ||
public CodeIndexBuilderLight(string name, LucenePoolLight codeIndexPool, LucenePoolLight hintIndexPool, ILog log) | ||
{ | ||
name.RequireNotNullOrEmpty(nameof(name)); | ||
codeIndexPool.RequireNotNull(nameof(codeIndexPool)); | ||
hintIndexPool.RequireNotNull(nameof(hintIndexPool)); | ||
log.RequireNotNull(nameof(log)); | ||
|
||
Name = name; | ||
CodeIndexPool = codeIndexPool; | ||
HintIndexPool = hintIndexPool; | ||
Log = log; | ||
} | ||
|
||
public string Name { get; } | ||
public LucenePoolLight CodeIndexPool { get; } | ||
public LucenePoolLight HintIndexPool { get; } | ||
public ILog Log { get; } | ||
|
||
public void InitIndexFolderIfNeeded() | ||
{ | ||
if (!Directory.Exists(CodeIndexPool.LuceneIndex)) | ||
{ | ||
Log.Info($"Create {Name} index folder {CodeIndexPool.LuceneIndex}"); | ||
Directory.CreateDirectory(CodeIndexPool.LuceneIndex); | ||
} | ||
|
||
if (!Directory.Exists(HintIndexPool.LuceneIndex)) | ||
{ | ||
Log.Info($"Create {Name} index folder {HintIndexPool.LuceneIndex}"); | ||
Directory.CreateDirectory(HintIndexPool.LuceneIndex); | ||
} | ||
} | ||
|
||
public void BuildIndexByBatch(IEnumerable<FileInfo> fileInfos, out List<FileInfo> failedIndexFiles, bool needCommit, bool triggerMerge, bool applyAllDeletes, CancellationToken cancellationToken, int batchSize = 10000) | ||
{ | ||
fileInfos.RequireNotNull(nameof(fileInfos)); | ||
batchSize.RequireRange(nameof(batchSize), int.MaxValue, 50); | ||
|
||
var codeDocuments = new List<Document>(); | ||
var hintWords = new List<string>(); | ||
failedIndexFiles = new List<FileInfo>(); | ||
|
||
foreach (var fileInfo in fileInfos) | ||
{ | ||
cancellationToken.ThrowIfCancellationRequested(); | ||
|
||
try | ||
{ | ||
if (fileInfo.Exists) | ||
{ | ||
var source = CodeSource.GetCodeSource(fileInfo, FilesContentHelper.ReadAllText(fileInfo.FullName)); | ||
|
||
var words = WordSegmenter.GetWords(source.Content).Where(word => word.Length > 3 && word.Length < 200); | ||
hintWords.AddRange(words); | ||
|
||
var doc = CodeIndexBuilder.GetDocumentFromSource(source); | ||
codeDocuments.Add(doc); | ||
|
||
Log.Info($"{Name}: Add index For {source.FilePath}"); | ||
} | ||
} | ||
catch (Exception ex) | ||
{ | ||
failedIndexFiles.Add(fileInfo); | ||
Log.Error($"{Name}: Add index for {fileInfo.FullName} failed, exception: " + ex); | ||
} | ||
|
||
if (codeDocuments.Count >= batchSize) | ||
{ | ||
BuildIndex(needCommit, triggerMerge, applyAllDeletes, codeDocuments, hintWords, cancellationToken); | ||
codeDocuments.Clear(); | ||
hintWords.Clear(); | ||
} | ||
} | ||
|
||
if (codeDocuments.Count > 0) | ||
{ | ||
BuildIndex(needCommit, triggerMerge, applyAllDeletes, codeDocuments, hintWords, cancellationToken); | ||
} | ||
} | ||
|
||
public void DeleteAllIndex() | ||
{ | ||
Log.Info($"{Name}: Delete All Index start"); | ||
CodeIndexPool.DeleteAllIndex(); | ||
HintIndexPool.DeleteAllIndex(); | ||
Log.Info($"{Name}: Delete All Index finished"); | ||
} | ||
|
||
public IEnumerable<(string FilePath, DateTime LastWriteTimeUtc)> GetAllIndexedCodeSource() | ||
{ | ||
return CodeIndexPool.Search(new MatchAllDocsQuery(), int.MaxValue).Select(u => (u.Get(nameof(CodeSource.FilePath)), new DateTime(long.Parse(u.Get(nameof(CodeSource.LastWriteTimeUtc)))))).ToList(); | ||
} | ||
|
||
void BuildIndex(bool needCommit, bool triggerMerge, bool applyAllDeletes, List<Document> codeDocuments, List<string> words, CancellationToken cancellationToken) | ||
{ | ||
cancellationToken.ThrowIfCancellationRequested(); | ||
|
||
Log.Info($"{Name}: Build code index start, documents count {codeDocuments.Count}"); | ||
CodeIndexPool.BuildIndex(codeDocuments, needCommit, triggerMerge, applyAllDeletes); | ||
Log.Info($"{Name}: Build code index finished"); | ||
|
||
Log.Info($"{Name}: Build hint index start, documents count {words.Count}"); | ||
words.ForEach(word => | ||
{ | ||
cancellationToken.ThrowIfCancellationRequested(); | ||
|
||
HintIndexPool.UpdateIndex(new Term(nameof(CodeWord.Word), word), new Document | ||
{ | ||
new StringField(nameof(CodeWord.Word), word, Field.Store.YES), | ||
new StringField(nameof(CodeWord.WordLower), word.ToLowerInvariant(), Field.Store.YES) | ||
}); | ||
}); | ||
|
||
if (needCommit || triggerMerge || applyAllDeletes) | ||
{ | ||
HintIndexPool.Commit(); | ||
} | ||
|
||
Log.Info($"{Name}: Build hint index finished"); | ||
} | ||
|
||
public bool IsDisposing { get; private set; } | ||
|
||
public void Dispose() | ||
{ | ||
if (!IsDisposing) | ||
{ | ||
IsDisposing = true; | ||
CodeIndexPool.Dispose(); | ||
HintIndexPool.Dispose(); | ||
} | ||
} | ||
|
||
public bool UpdateIndex(FileInfo fileInfo, CancellationToken cancellationToken) | ||
{ | ||
try | ||
{ | ||
if (fileInfo.Exists) | ||
{ | ||
var source = CodeSource.GetCodeSource(fileInfo, FilesContentHelper.ReadAllText(fileInfo.FullName)); | ||
var words = WordSegmenter.GetWords(source.Content).Where(word => word.Length > 3 && word.Length < 200).ToList(); | ||
var doc = CodeIndexBuilder.GetDocumentFromSource(source); | ||
CodeIndexPool.UpdateIndex(GetNoneTokenizeFieldTerm(nameof(CodeSource.FilePath), source.FilePath), doc); | ||
words.ForEach(word => | ||
{ | ||
cancellationToken.ThrowIfCancellationRequested(); | ||
|
||
HintIndexPool.UpdateIndex(new Term(nameof(CodeWord.Word), word), new Document | ||
{ | ||
new StringField(nameof(CodeWord.Word), word, Field.Store.YES), | ||
new StringField(nameof(CodeWord.WordLower), word.ToLowerInvariant(), Field.Store.YES) | ||
}); | ||
}); | ||
|
||
Log.Info($"{Name}: Update index For {source.FilePath} finished"); | ||
} | ||
|
||
return true; | ||
} | ||
catch (Exception ex) | ||
{ | ||
Log.Error($"{Name}: Update index for {fileInfo.FullName} failed, exception: " + ex); | ||
|
||
if (ex is OperationCanceledException) | ||
{ | ||
throw; | ||
} | ||
|
||
return false; | ||
} | ||
} | ||
|
||
public bool DeleteIndex(string filePath) | ||
{ | ||
try | ||
{ | ||
CodeIndexPool.DeleteIndex(GetNoneTokenizeFieldTerm(nameof(CodeSource.FilePath), filePath)); | ||
Log.Info($"{Name}: Delete index For {filePath} finished"); | ||
|
||
return true; | ||
} | ||
catch (Exception ex) | ||
{ | ||
Log.Error($"{Name}: Delete index for {filePath} failed, exception: " + ex); | ||
return false; | ||
} | ||
} | ||
|
||
public void Commit() | ||
{ | ||
CodeIndexPool.Commit(); | ||
HintIndexPool.Commit(); | ||
} | ||
|
||
public Term GetNoneTokenizeFieldTerm(string fieldName, string termValue) | ||
{ | ||
return new Term($"{fieldName}{Constants.NoneTokenizeFieldSuffix}", termValue); | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
using System; | ||
using System.Collections.Generic; | ||
using Lucene.Net.Analysis; | ||
using Lucene.Net.Documents; | ||
using Lucene.Net.Index; | ||
using Lucene.Net.Search; | ||
|
||
namespace CodeIndex.IndexBuilder | ||
{ | ||
public interface ILucenePool : IDisposable | ||
{ | ||
void BuildIndex(IEnumerable<Document> documents, bool needCommit, bool triggerMerge = false, bool applyAllDeletes = false); | ||
|
||
Document[] Search(Query query, int maxResults, Filter filter = null); | ||
|
||
void DeleteIndex(params Query[] searchQueries); | ||
|
||
void DeleteIndex(params Term[] terms); | ||
|
||
void UpdateIndex(Term term, Document document); | ||
|
||
void DeleteAllIndex(); | ||
|
||
string LuceneIndex { get; } | ||
|
||
Analyzer Analyzer { get; } | ||
} | ||
} |
Oops, something went wrong.