-
Notifications
You must be signed in to change notification settings - Fork 0
/
repo2text.js
230 lines (183 loc) · 8.89 KB
/
repo2text.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
const express = require('express');
const path = require('path');
const fs = require('fs-extra');
const app = express();
require('dotenv').config();
const { exec } = require('child_process');
app.get('/repo', async (req, res) => {
let repoUrl = req.query.repoName;
if (!repoUrl) {
return res.status(400).send('Repository URL not specified');
}
if (!repoUrl.startsWith('https://github.com/')) {
repoUrl = `https://github.com/${repoUrl}.git`;
}
const repoName = path.basename(repoUrl, '.git');
const localPath = path.join(__dirname, 'repo', repoName);
try {
// Ensure the directory is clean
await fs.remove(localPath);
// Clone the repository
const cloneCommand = process.env.GITHUB_TOKEN
? `git clone https://${process.env.GITHUB_TOKEN}@${repoUrl.substring(8)} ${localPath}`
: `git clone ${repoUrl} ${localPath}`;
await execPromise(cloneCommand);
// Read files excluding the ignored ones
const content = await processFiles(localPath);
const outputFile = path.join(__dirname, 'output', `${repoName}.txt`);
await fs.outputFile(outputFile, content.join('')); // Join without newlines
res.download(outputFile, `${repoName}.txt`);
} catch (error) {
console.error('Failed to process repository:', error);
res.status(500).send(`Server error: ${error.message}`);
}
});
app.get('/file-history', async (req, res) => {
let { repoName, filePath } = req.query;
if (!repoName || !filePath) {
return res.status(400).send('Repository name or file path not specified');
}
const repoUrl = `https://github.com/${repoName}.git`;
const localPath = path.join(__dirname, 'repo', path.basename(repoName, '.git'));
const localFilePath = path.join(localPath, filePath);
try {
await fs.remove(localPath); // Ensure the directory is clean
const cloneCommand = process.env.GITHUB_TOKEN
? `git clone https://${process.env.GITHUB_TOKEN}@${repoUrl.substring(8)} ${localPath}`
: `git clone ${repoUrl} ${localPath}`;
await execPromise(cloneCommand);
const blameCommand = `git blame -p -- ${path.relative(localPath, localFilePath)}`;
const fileBlame = await execPromise(blameCommand, { cwd: localPath });
// Split the output into sections per line
const lines = fileBlame.split('\n');
const lineHistory = [];
let currentSection = [];
// Example: fetch and store the initial commit date
const initialCommitCommand = `git log --format=%at --reverse -- ${filePath}`;
const initialCommitTimestamp = await execPromise(initialCommitCommand, { cwd: localPath });
const initialCommitDate = new Date(parseInt(initialCommitTimestamp.trim(), 10) * 1000).toISOString().substring(0, 10);
lines.forEach(line => {
if (line.startsWith('\t')) { // Detect the start of a new line of code
if (line.trim().length > 1) { // Ignore empty lines
const authorTimeLine = currentSection.find(l => l.startsWith('author-time '));
const unixTimestamp = authorTimeLine ? parseInt(authorTimeLine.split(' ')[1], 10) : null;
const date = unixTimestamp ? new Date(unixTimestamp * 1000).toISOString().substring(0, 10) : initialCommitDate;
lineHistory.push(`|${date}| ${line.substring(1)}`); // Skip the tab character
}
currentSection = []; // Reset for the next section
} else {
currentSection.push(line); // Accumulate lines for current section
}
});
res.send(lineHistory.join('\n'));
} catch (error) {
console.error('Failed to process file history:', error);
res.status(500).send(`Server error: ${error.message}`);
}
});
const { Tiktoken } = require("tiktoken/lite");
const cl100k_base = require("tiktoken/encoders/cl100k_base.json");
app.get('/latest-updates', async (req, res) => {
const { repoName } = req.query;
if (!repoName) {
return res.status(400).send('Repository name not specified');
}
const repoUrl = `https://github.com/${repoName}.git`;
const localPath = path.join(__dirname, 'repo', path.basename(repoName, '.git'));
try {
const FIVE_MINUTES = 300000; // Milliseconds
let shouldClone = true;
if (await fs.pathExists(localPath)) {
const { mtime } = await fs.stat(localPath);
const now = new Date();
if ((now - mtime) < FIVE_MINUTES) {
shouldClone = false;
} else {
await fs.remove(localPath); // Clean up old directory if more than 5 minutes old
}
}
if (shouldClone) {
const cloneCommand = process.env.GITHUB_TOKEN
? `git clone https://${process.env.GITHUB_TOKEN}@${repoUrl.substring(8)} ${localPath}`
: `git clone ${repoUrl} ${localPath}`;
await execPromise(cloneCommand);
}
const oneMonthAgo = new Date();
oneMonthAgo.setMonth(oneMonthAgo.getMonth() - 1);
const logCommand = `git log -p --since="${oneMonthAgo.toISOString()}" -- . ":!package-lock.json"`;
let logOutput = await execPromise(logCommand, { cwd: localPath });
// Setup Tiktoken encoding
const encoding = new Tiktoken(
cl100k_base.bpe_ranks,
cl100k_base.special_tokens,
cl100k_base.pat_str
);
// Split log output by commits and tokenize each commit
let commits = logOutput.split(/(?=commit [a-f0-9]{40})/).map(commit => {
return {
text: commit,
tokens: encoding.encode(commit)
};
});
// Calculate total tokens and adjust by removing oldest commits if needed
const TOKEN_LIMIT = 8000;
let totalTokens = commits.reduce((sum, item) => sum + item.tokens.length, 0);
while (totalTokens > TOKEN_LIMIT && commits.length > 0) {
let removed = commits.pop();
totalTokens -= removed.tokens.length;
}
encoding.free(); // Free up memory used by Tiktoken
// Cleanup text: remove extra line breaks and spaces
let finalText = commits.map(commit => commit.text).join('\n');
finalText = finalText.replace(/\s{2,}/g, ' ').replace(/\n{2,}/g, '\n');
res.send(finalText);
} catch (error) {
console.error('Failed to process latest commits:', error);
res.status(500).send(`Server error: ${error.message}`);
}
});
async function execPromise(command, options = {}) {
return new Promise((resolve, reject) => {
// Set the GIT_PAGER environment variable to 'cat' to disable paging
const env = { ...process.env, GIT_PAGER: 'cat' };
// Include the modified environment in the execution options
const execOptions = { ...options, env };
exec(command, execOptions, (error, stdout, stderr) => {
if (error) {
reject(error);
} else {
resolve(stdout || stderr);
}
});
});
}
async function processFiles(dir) {
const ignorePatterns = [ 'LICENSE', 'package-lock.json', 'yarn.lock', 'node_modules', '.DS_Store', '.env', '.env.*', '.git', '.gitignore', 'build', 'dist', 'coverage', '.vscode', '.idea', '*.log', '*.tgz', 'firebase.json', '.firebaserc', 'firestore.rules', 'firestore.indexes.json'];
const mediaExtensions = ['.png', '.jpg', '.jpeg', '.gif', '.bmp', '.tiff', '.mp4', '.mp3', '.svg', '.ico', '.woff', '.woff2', '.ttf', '.eot', '.webp'];
let results = [];
const files = await fs.readdir(dir);
for (const file of files) {
const filePath = path.join(dir, file);
const stat = await fs.stat(filePath);
if (ignorePatterns.some(pattern => filePath.includes(pattern))) continue;
if (stat.isDirectory()) {
results.push(`Directory: ${file}`);
const subdirResults = await processFiles(filePath);
results = results.concat(subdirResults);
} else {
const fileExtension = path.extname(file).toLowerCase();
if (mediaExtensions.includes(fileExtension)) {
results.push(`File: ${file} (media file, content not displayed)`);
} else {
const content = await fs.readFile(filePath, 'utf8');
// Remove all newlines and replace multiple spaces with a single space
const compressedContent = content.replace(/(\r\n|\n|\r)+/gm, " ").replace(/\s\s+/g, ' ');
results.push(`File: ${file} --- ${compressedContent} ---`);
}
}
}
return results;
}
app.listen(3000, () => {
console.log('Server started on port 3000');
});