Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

adding more granular diff format for autoedits model training #6173

Open
wants to merge 15 commits into
base: main
Choose a base branch
from
1 change: 1 addition & 0 deletions vscode/src/autoedits/prompt-utils.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -384,6 +384,7 @@ line 64
</area_around_code_to_rewrite>
Now, continue where I left off and finish my change by rewriting "code_to_rewrite":
`
expect(prompt.toString()).toEqual(expectedPrompt)
Expand Down
69 changes: 57 additions & 12 deletions vscode/src/autoedits/prompt-utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,11 @@ interface CurrentFileContext {
range: vscode.Range
}

interface RecentEditPromptComponents {
longTermDiff: PromptString
shortTermDiff: PromptString
}

// Helper function to get prompt in some format
export function getBaseUserPrompt(
docContext: DocumentContext,
Expand Down Expand Up @@ -107,10 +112,8 @@ export function getBaseUserPrompt(
getRecentlyViewedSnippetsPrompt
)

const recentEditsPrompt = getPromptForTheContextSource(
contextItemMapping.get(RetrieverIdentifier.RecentEditsRetriever) || [],
RECENT_EDITS_INSTRUCTION,
getRecentEditsPrompt
const recentEditsPromptComponents = getRecentEditsPromptComponents(
contextItemMapping.get(RetrieverIdentifier.RecentEditsRetriever) || []
)

const lintErrorsPrompt = getPromptForTheContextSource(
Expand All @@ -134,10 +137,11 @@ export function getBaseUserPrompt(
${jaccardSimilarityPrompt}
${recentViewsPrompt}
${CURRENT_FILE_INSTRUCTION}${fileWithMarkerPrompt}
${recentEditsPrompt}
${recentEditsPromptComponents.longTermDiff}
${lintErrorsPrompt}
${recentCopyPrompt}
${areaPrompt}
${recentEditsPromptComponents.shortTermDiff}
${FINAL_USER_PROMPT}
`
autoeditsLogger.logDebug('AutoEdits', 'Prompt\n', finalPrompt)
Expand Down Expand Up @@ -323,24 +327,61 @@ ${RECENT_COPY_TAG_CLOSE}
`
}

export function getRecentEditsPrompt(contextItems: AutocompleteContextSnippet[]): PromptString {
export function getRecentEditsPromptComponents(
contextItems: AutocompleteContextSnippet[]
): RecentEditPromptComponents {
const recentEdits = getContextItemsForIdentifier(
contextItems,
RetrieverIdentifier.RecentEditsRetriever
)
recentEdits.reverse()
if (recentEdits.length === 0) {
let shortTermDiff: PromptString = ps``
let longTermDiff: PromptString = ps``
if (recentEdits.length > 0) {
shortTermDiff = getRecentEditPrompt([recentEdits.at(-1)!])
}
if (recentEdits.length > 1) {
const longTermDiffPrompt = getRecentEditPromptLongTermDiffComponent(recentEdits.slice(0, -1))
longTermDiff = ps`${RECENT_EDITS_INSTRUCTION}
${longTermDiffPrompt}
`
}
return {
shortTermDiff,
longTermDiff,
}
}

function getRecentEditPromptLongTermDiffComponent(context: AutocompleteContextSnippet[]): PromptString {
if (context.length === 0) {
return ps``
}
const recentEditsPrompts = recentEdits.map(item =>
getContextPromptWithPath(
const prompts = context.map(item =>
getContextPromptForDiffWithPath(
PromptString.fromDisplayPath(item.uri),
PromptString.fromAutocompleteContextSnippet(item).content
)
)
const recentEditsPrompt = PromptString.join(recentEditsPrompts, ps`\n`)
return ps`${RECENT_EDITS_TAG_OPEN}
${recentEditsPrompt}
return ps`
${RECENT_EDITS_TAG_OPEN}
${PromptString.join(prompts, ps`\n`)}
${RECENT_EDITS_TAG_CLOSE}
`
}

function getRecentEditPrompt(contextItems: AutocompleteContextSnippet[]): PromptString {
if (contextItems.length === 0) {
return ps``
}
const prompts = contextItems.map(item =>
getContextPromptForDiffWithPath(
PromptString.fromDisplayPath(item.uri),
PromptString.fromAutocompleteContextSnippet(item).content
)
)
return ps`
${RECENT_EDITS_TAG_OPEN}
${PromptString.join(prompts, ps`\n`)}
${RECENT_EDITS_TAG_CLOSE}
`
}
Expand Down Expand Up @@ -455,3 +496,7 @@ function getContextItemsForIdentifier(
function getContextPromptWithPath(filePath: PromptString, content: PromptString): PromptString {
return ps`(\`${filePath}\`)\n\n${content}\n`
}

function getContextPromptForDiffWithPath(filePath: PromptString, content: PromptString): PromptString {
return ps`${filePath}\n${content}`
}
1 change: 1 addition & 0 deletions vscode/src/completions/analytics-logger.ts
Original file line number Diff line number Diff line change
Expand Up @@ -803,6 +803,7 @@ function suggestionDocumentDiffTracker(
const documentText = document.getText(trackingRange)

const persistenceTimeoutList = [
10 * 1000, // 10 seconds
20 * 1000, // 20 seconds
60 * 1000, // 60 seconds
120 * 1000, // 120 seconds
Expand Down
14 changes: 9 additions & 5 deletions vscode/src/completions/context/context-data-logging.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,15 @@ import type { RetrievedContextResults } from './completions-context-ranker'
import { JaccardSimilarityRetriever } from './retrievers/jaccard-similarity/jaccard-similarity-retriever'
import { DiagnosticsRetriever } from './retrievers/recent-user-actions/diagnostics-retriever'
import { RecentCopyRetriever } from './retrievers/recent-user-actions/recent-copy'
import { RecentEditsRetrieverDiffStrategyIdentifier } from './retrievers/recent-user-actions/recent-edits-diff-helpers/recent-edits-diff-strategy'
import { LineLevelDiffStrategy } from './retrievers/recent-user-actions/recent-edits-diff-helpers/line-level-diff'
import { TwoStageUnifiedDiffStrategy } from './retrievers/recent-user-actions/recent-edits-diff-helpers/two-stage-unified-diff'
import { RecentEditsRetriever } from './retrievers/recent-user-actions/recent-edits-retriever'
import { RecentViewPortRetriever } from './retrievers/recent-user-actions/recent-view-port'
import { RetrieverIdentifier } from './utils'

interface RetrieverConfig {
identifier: RetrieverIdentifier
maxSnippets: number
maxSnippets?: number
}

export class ContextRetrieverDataCollection implements vscode.Disposable {
Expand All @@ -31,7 +32,7 @@ export class ContextRetrieverDataCollection implements vscode.Disposable {
private gitMetadataInstance = GitHubDotComRepoMetadata.getInstance()

private readonly retrieverConfigs: RetrieverConfig[] = [
{ identifier: RetrieverIdentifier.RecentEditsRetriever, maxSnippets: 15 },
{ identifier: RetrieverIdentifier.RecentEditsRetriever },
{ identifier: RetrieverIdentifier.DiagnosticsRetriever, maxSnippets: 15 },
{ identifier: RetrieverIdentifier.RecentViewPortRetriever, maxSnippets: 10 },
]
Expand Down Expand Up @@ -105,8 +106,11 @@ export class ContextRetrieverDataCollection implements vscode.Disposable {
case RetrieverIdentifier.RecentEditsRetriever:
return new RecentEditsRetriever({
maxAgeMs: 10 * 60 * 1000,
diffStrategyIdentifier:
RecentEditsRetrieverDiffStrategyIdentifier.UnifiedDiffWithLineNumbers,
diffStrategyList: [
new TwoStageUnifiedDiffStrategy(),
new LineLevelDiffStrategy({ shouldGroupNonOverlappingLines: true }),
new LineLevelDiffStrategy({ shouldGroupNonOverlappingLines: false }),
],
})
case RetrieverIdentifier.DiagnosticsRetriever:
return new DiagnosticsRetriever({
Expand Down
26 changes: 15 additions & 11 deletions vscode/src/completions/context/context-strategy.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@ import { JaccardSimilarityRetriever } from './retrievers/jaccard-similarity/jacc
import { LspLightRetriever } from './retrievers/lsp-light/lsp-light-retriever'
import { DiagnosticsRetriever } from './retrievers/recent-user-actions/diagnostics-retriever'
import { RecentCopyRetriever } from './retrievers/recent-user-actions/recent-copy'
import { RecentEditsRetrieverDiffStrategyIdentifier } from './retrievers/recent-user-actions/recent-edits-diff-helpers/recent-edits-diff-strategy'
import { TwoStageUnifiedDiffStrategy } from './retrievers/recent-user-actions/recent-edits-diff-helpers/two-stage-unified-diff'
import { UnifiedDiffStrategy } from './retrievers/recent-user-actions/recent-edits-diff-helpers/unified-diff'
import { RecentEditsRetriever } from './retrievers/recent-user-actions/recent-edits-retriever'
import { RecentViewPortRetriever } from './retrievers/recent-user-actions/recent-view-port'
import { loadTscRetriever } from './retrievers/tsc/load-tsc-retriever'
Expand Down Expand Up @@ -55,35 +56,39 @@ export class DefaultContextStrategyFactory implements ContextStrategyFactory {
this.allLocalRetrievers = [
new RecentEditsRetriever({
maxAgeMs: 60 * 1000,
diffStrategyIdentifier:
RecentEditsRetrieverDiffStrategyIdentifier.UnifiedDiff,
diffStrategyList: [
new UnifiedDiffStrategy({ addLineNumbers: false }),
],
}),
]
break
case 'recent-edits-1m':
this.allLocalRetrievers = [
new RecentEditsRetriever({
maxAgeMs: 60 * 1000,
diffStrategyIdentifier:
RecentEditsRetrieverDiffStrategyIdentifier.UnifiedDiff,
diffStrategyList: [
new UnifiedDiffStrategy({ addLineNumbers: false }),
],
}),
]
break
case 'recent-edits-5m':
this.allLocalRetrievers = [
new RecentEditsRetriever({
maxAgeMs: 60 * 5 * 1000,
diffStrategyIdentifier:
RecentEditsRetrieverDiffStrategyIdentifier.UnifiedDiff,
diffStrategyList: [
new UnifiedDiffStrategy({ addLineNumbers: false }),
],
}),
]
break
case 'recent-edits-mixed':
this.allLocalRetrievers = [
new RecentEditsRetriever({
maxAgeMs: 60 * 1000,
diffStrategyIdentifier:
RecentEditsRetrieverDiffStrategyIdentifier.UnifiedDiff,
diffStrategyList: [
new UnifiedDiffStrategy({ addLineNumbers: false }),
],
}),
new JaccardSimilarityRetriever(),
]
Expand Down Expand Up @@ -127,8 +132,7 @@ export class DefaultContextStrategyFactory implements ContextStrategyFactory {
this.allLocalRetrievers = [
new RecentEditsRetriever({
maxAgeMs: 10 * 60 * 1000,
diffStrategyIdentifier:
RecentEditsRetrieverDiffStrategyIdentifier.UnifiedDiffWithLineNumbers,
diffStrategyList: [new TwoStageUnifiedDiffStrategy()],
}),
new DiagnosticsRetriever({
contextLines: 0,
Expand Down

This file was deleted.

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
import type { PromptString } from '@sourcegraph/cody-shared'
import type * as vscode from 'vscode'

export interface RecentEditsRetrieverDiffStrategy {
getDiffHunks(input: DiffCalculationInput): DiffHunk[]
getDiffStrategyName(): string
}

export interface TextDocumentChange {
timestamp: number
change: vscode.TextDocumentContentChangeEvent
// The range in the document where the text was inserted.
insertedRange: vscode.Range
}

export interface DiffCalculationInput {
uri: vscode.Uri
oldContent: string
changes: TextDocumentChange[]
}

export interface DiffHunk {
uri: vscode.Uri
latestEditTimestamp: number
diff: PromptString
}

export interface UnifiedPatchResponse {
uri: vscode.Uri
newContent: string
diff: PromptString
latestEditTimestamp: number
}
Loading
Loading