Skip to content

Commit

Permalink
Downloader as tarball
Browse files Browse the repository at this point in the history
  • Loading branch information
brianreicher committed Nov 19, 2023
1 parent f37e523 commit bc3a4a3
Show file tree
Hide file tree
Showing 2 changed files with 44 additions and 28 deletions.
1 change: 1 addition & 0 deletions ingestion/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,4 @@ serde_json = "1.0"
futures = "0.3.28"
reqwest = "0.11"
zip = "0.6.6"
octocrab = "0.32.0"
71 changes: 43 additions & 28 deletions ingestion/src/downloader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ use std::path::Path;
use mongo_utils::MongoDriver;
use mongodb::bson::doc;
use zip::read::ZipArchive;

use octocrab;
use crate::mongo_utils;


Expand All @@ -24,33 +24,6 @@ impl GitHubDownloader {
}
}

pub async fn download_git_zips(
&self,
urls: Vec<&str>,
zip_dirs: Vec<&str>,
) -> Result<(), Box<dyn Error>> {
for (index, url) in urls.iter().enumerate() {
let response = self.client.get(url.clone()).send().await?;

if response.status() != reqwest::StatusCode::OK {
eprintln!("Error downloading {}: {:?}", url, response.status());
continue;
}
let filename: &str = url.split('/').last().unwrap_or("unknown.zip");
let file_path: std::path::PathBuf = Path::new(zip_dirs[index]).join(filename);

let mut response_body = response.bytes().await?;
while let chunk = response_body.chunks(8) {
let chunk = chunk;
file_path.write_all(&chunk)?; // TODO: write successfully
}

println!("Downloaded: {}", file_path.display());
}

Ok(())
}

pub async fn mongo_insert(&self, zip_dir: Vec<&str>, filter_suffix: Vec<&str>) -> mongodb::error::Result<()> {
let file = File::open(zip_dir)?;
let reader = std::io::BufReader::new(file);
Expand Down Expand Up @@ -81,6 +54,48 @@ impl GitHubDownloader {
self.mongo_model.insert_document(self.collection.as_str(), document).await?;
}

Ok(())
}
pub async fn download_git_tarballs(
&self,
organization: &str,
token: &str,
output_dir: &str,
) -> Result<(), Box<dyn Error>> {
let repos = octocrab::instance()
.repos(organization)
.list()
.per_page(100)
.token(token)
.send()
.await?;

for repo in repos {
let repo_name = repo.name.unwrap_or_default();
let tarball_url = format!(
"https://api.github.com/repos/{}/{}/tarball",
organization, repo_name
);

let response = self.client.get(&tarball_url).header("Authorization", format!("Bearer {}", token)).send().await?;

if response.status() != reqwest::StatusCode::OK {
eprintln!("Error downloading {}: {:?}", tarball_url, response.status());
continue;
}

let output_path = Path::new(output_dir).join(format!("{}.tar.gz", repo_name));
let mut output_file = File::create(&output_path)?;

let mut response_body = response.bytes().await?;
while let chunk = response_body.chunk() {
let chunk = chunk?;
output_file.write_all(&chunk)?;
}

println!("Downloaded: {}", output_path.display());
}

Ok(())
}
}

0 comments on commit bc3a4a3

Please sign in to comment.