Skip to content

Commit

Permalink
fixed issue #440
Browse files Browse the repository at this point in the history
  • Loading branch information
simsong committed Jan 15, 2024
1 parent de09e37 commit d1beb8a
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 13 deletions.
2 changes: 1 addition & 1 deletion src/be20_api
Submodule be20_api updated 1 files
+11 −7 scanner_set.cpp
8 changes: 7 additions & 1 deletion src/phase1.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -282,12 +282,18 @@ void Phase1::dfxml_write_source()
void Phase1::phase1_run()
{
assert(ss.get_current_phase() == scanner_params::PHASE_SCAN);
// save all of the pages we have seen in the DFXML file

// save all of the pages we had previously seen (through restarting) in the DFXML file
for (const auto &it : config.seen_page_ids) {
ss.record_work_start_pos0str( it );
}

// now start the new run
xreport.push("runtime","xmlns:debug=\"http://www.github.com/simsong/bulk_extractor/issues\"");

// process all of the sbufs
read_process_sbufs();

if (!config.opt_quiet) cout << "All data read; waiting for threads to finish..." << std::endl;
ss.join();
xreport.pop("runtime");
Expand Down
25 changes: 14 additions & 11 deletions src/test_be3.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ TEST_CASE("e2e-H", "[end-to-end]") {

/* Run on the first 100k of the emails dataset
* bulk_extractor -0q -o [outdir] nps-2010-emails.100k.raw
* Runs twice, so that we can also test the restarting logic
*/
TEST_CASE("e2e-0", "[end-to-end]") {
std::filesystem::path inpath = test_dir() / "nps-2010-emails.100k.raw";
Expand All @@ -131,27 +132,29 @@ TEST_CASE("e2e-0", "[end-to-end]") {
std::cerr << "STDOUT:" << std::endl << cout.str() << std::endl << std::endl << "STDERR:" << std::endl << cerr.str() << std::endl;
REQUIRE( ret==0 );
}
// https://stackoverflow.com/questions/20731/how-do-you-clear-a-stringstream-variable
std::stringstream().swap(cout);
std::stringstream().swap(cerr);

ret = run_be(cout, cerr, argv);
if (ret!=0) {
std::cerr << "STDOUT:" << std::endl << cout.str() << std::endl << std::endl
<< "STDERR:" << std::endl << cerr.str() << std::endl;
REQUIRE( ret==0 );
}

/* make sure that there are both debug:work_start and debug:work_stop tags in the output */
auto xml_file = outdir_string + "/report.xml";
grep( "debug:work_start", xml_file);
printf("************** CALLING CHECK FOR work_stop ********\n");
grep( "debug:work_stop", xml_file);

/* Validate the dfxml file is valid dfxml*/
std::string validate = std::string("xmllint --noout ") + xml_file;
int code = system( validate.c_str());
REQUIRE( code==0 );

// This is the second time through - clear cout and cerr first
// https://stackoverflow.com/questions/20731/how-do-you-clear-a-stringstream-variable
std::stringstream().swap(cout);
std::stringstream().swap(cerr);

// Re-run to make sure that works
ret = run_be(cout, cerr, argv);
if (ret!=0) {
std::cerr << "STDOUT:" << std::endl << cout.str() << std::endl << std::endl
<< "STDERR:" << std::endl << cerr.str() << std::endl;
REQUIRE( ret==0 );
}
}

/*
Expand Down

0 comments on commit d1beb8a

Please sign in to comment.