Skip to content

Commit

Permalink
Adding support for parsing amazon cloudfront log files in log importe…
Browse files Browse the repository at this point in the history
…r & making W3cExtendedFormat treat time-taken field as seconds by default (since that is what the W3C format specifies). Also add support for event tracking and cloudfront RTMP logs (which provide event logs). Also allow multiple spaces in between W3C extended log files. Allow derived classes of W3cExtendedFormat to define new custom field regexes.
  • Loading branch information
diosmosis committed Dec 15, 2014
1 parent 980676f commit 04f64ac
Show file tree
Hide file tree
Showing 71 changed files with 1,256 additions and 370 deletions.
20 changes: 15 additions & 5 deletions misc/log-analytics/import_logs.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,8 +254,6 @@ def check_format(self, file):
regex = '\S+'
full_regex.append(regex)
full_regex = '\s+'.join(full_regex)
logging.debug(full_regex)
logging.debug(first_line)
self.regex = re.compile(full_regex)

file.seek(0)
Expand Down Expand Up @@ -1392,8 +1390,15 @@ def _get_hit_args(self, hit):
)

if hit.generation_time_milli > 0:
args['gt_ms'] = hit.generation_time_milli
logging.debug(args)
args['gt_ms'] = int(hit.generation_time_milli)

if hit.event_category and hit.event_action:
args['e_c'] = hit.event_category
args['e_a'] = hit.event_action

if hit.event_name:
args['e_n'] = hit.event_name

return args

def _record_hits(self, hits):
Expand Down Expand Up @@ -1583,7 +1588,7 @@ def check_format(lineOrFile):
else:
match = candidate_format.check_format(lineOrFile)
except Exception, e:
logging.debug(str(e))
logging.debug('Error in format checking: %s', str(e))
pass

if match:
Expand Down Expand Up @@ -1790,9 +1795,14 @@ def invalid_line(line, reason):

# add event info
try:
hit.event_category = hit.event_action = hit.event_name = None

hit.event_category = format.get('event_category')
hit.event_action = format.get('event_action')

hit.event_name = format.get('event_name')
if hit.event_name == '-':
hit.event_name = None
except:
pass

Expand Down
2 changes: 1 addition & 1 deletion misc/log-analytics/tests/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -342,7 +342,7 @@ def test_iis_custom_format():
import_logs.config.options.enable_http_redirects = True
import_logs.config.options.enable_http_errors = True
import_logs.config.options.replay_tracking = False
# import_logs.config.options.w3c_time_taken_in_millisecs = True test that even w/o this, we get the right format
# import_logs.config.options.w3c_time_taken_in_millisecs = True test that even w/o this, we get the right values
import_logs.parser.parse(file_)

hits = [hit.__dict__ for hit in Recorder.recorders]
Expand Down
30 changes: 30 additions & 0 deletions tests/PHPUnit/Fixtures/ManySitesImportedLogs.php
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ class ManySitesImportedLogs extends Fixture
public $addSegments = false;
public $includeIisWithCustom = false;
public $includeNetscaler = false;
public $includeCloudfront = false;
public $includeCloudfrontRtmp = false;

public static function createAccessInstance()
{
Expand Down Expand Up @@ -121,6 +123,14 @@ private function trackVisits()
if ($this->includeNetscaler) {
$this->logNetscaler();
}

if ($this->includeCloudfront) {
$this->logCloudfront();
}

if ($this->includeCloudfrontRtmp) {
$this->logCloudfrontRtmp();
}
}

private function setupSegments()
Expand Down Expand Up @@ -264,4 +274,24 @@ private function logNetscaler()

return self::executeLogImporter($logFile, $opts);
}

private function logCloudfront()
{
$logFile = PIWIK_INCLUDE_PATH . '/tests/resources/access-logs/fake_logs_cloudfront.log';

$opts = array('--idsite' => $this->idSite,
'--token-auth' => self::getTokenAuth());

return self::executeLogImporter($logFile, $opts);
}

private function logCloudfrontRtmp()
{
$logFile = PIWIK_INCLUDE_PATH . '/tests/resources/access-logs/fake_logs_cloudfront_rtmp.log';

$opts = array('--idsite' => $this->idSite,
'--token-auth' => self::getTokenAuth());

return self::executeLogImporter($logFile, $opts);
}
}
2 changes: 0 additions & 2 deletions tests/PHPUnit/Framework/Fixture.php
Original file line number Diff line number Diff line change
Expand Up @@ -776,8 +776,6 @@ protected static function executeLogImporter($logFile, $options)

// run the command
exec($cmd, $output, $result);
echo "$cmd - \n";
echo implode("\n", $output)."\n";
if ($result !== 0) {
throw new Exception("log importer failed: " . implode("\n", $output) . "\n\ncommand used: $cmd");
}
Expand Down
5 changes: 4 additions & 1 deletion tests/PHPUnit/System/ImportLogsTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
*/
class ImportLogsTest extends SystemTestCase
{
/** @var ManySitesImportedLogs */
public static $fixture = null; // initialized below class definition

/**
Expand Down Expand Up @@ -104,4 +105,6 @@ public static function getOutputPrefix()

ImportLogsTest::$fixture = new ManySitesImportedLogs();
ImportLogsTest::$fixture->includeIisWithCustom = true;
ImportLogsTest::$fixture->includeNetscaler = true;
ImportLogsTest::$fixture->includeNetscaler = true;
ImportLogsTest::$fixture->includeCloudfront = true;
ImportLogsTest::$fixture->includeCloudfrontRtmp = true;
Original file line number Diff line number Diff line change
Expand Up @@ -1129,4 +1129,66 @@
</row>
</subtable>
</row>
<row>
<label>view</label>
<nb_visits>1</nb_visits>
<nb_hits>1</nb_hits>
<sum_time_spent>0</sum_time_spent>
<nb_hits_with_time_generation>1</nb_hits_with_time_generation>
<min_time_generation>0.001</min_time_generation>
<max_time_generation>0.001</max_time_generation>
<entry_nb_visits>1</entry_nb_visits>
<entry_nb_actions>1</entry_nb_actions>
<entry_sum_visit_length>0</entry_sum_visit_length>
<entry_bounce_count>1</entry_bounce_count>
<exit_nb_visits>1</exit_nb_visits>
<avg_time_on_page>0</avg_time_on_page>
<bounce_rate>100%</bounce_rate>
<exit_rate>100%</exit_rate>
<avg_time_generation>0.001</avg_time_generation>
<subtable>
<row>
<label>my</label>
<nb_visits>1</nb_visits>
<nb_hits>1</nb_hits>
<sum_time_spent>0</sum_time_spent>
<nb_hits_with_time_generation>1</nb_hits_with_time_generation>
<min_time_generation>0.001</min_time_generation>
<max_time_generation>0.001</max_time_generation>
<entry_nb_visits>1</entry_nb_visits>
<entry_nb_actions>1</entry_nb_actions>
<entry_sum_visit_length>0</entry_sum_visit_length>
<entry_bounce_count>1</entry_bounce_count>
<exit_nb_visits>1</exit_nb_visits>
<avg_time_on_page>0</avg_time_on_page>
<bounce_rate>100%</bounce_rate>
<exit_rate>100%</exit_rate>
<avg_time_generation>0.001</avg_time_generation>
<subtable>
<row>
<label>/file.html</label>
<nb_visits>1</nb_visits>
<nb_hits>1</nb_hits>
<sum_time_spent>0</sum_time_spent>
<nb_hits_with_time_generation>1</nb_hits_with_time_generation>
<min_time_generation>0.001</min_time_generation>
<max_time_generation>0.001</max_time_generation>
<entry_nb_visits>1</entry_nb_visits>
<entry_nb_actions>1</entry_nb_actions>
<entry_sum_visit_length>0</entry_sum_visit_length>
<entry_bounce_count>1</entry_bounce_count>
<exit_nb_visits>1</exit_nb_visits>
<sum_daily_nb_uniq_visitors>1</sum_daily_nb_uniq_visitors>
<sum_daily_entry_nb_uniq_visitors>1</sum_daily_entry_nb_uniq_visitors>
<sum_daily_exit_nb_uniq_visitors>1</sum_daily_exit_nb_uniq_visitors>
<avg_time_on_page>0</avg_time_on_page>
<bounce_rate>100%</bounce_rate>
<exit_rate>100%</exit_rate>
<avg_time_generation>0.001</avg_time_generation>
<url>http://piwik.net/view/my/file.html</url>
</row>
</subtable>
</row>
</subtable>
</row>
</result>
Original file line number Diff line number Diff line change
Expand Up @@ -1470,4 +1470,66 @@
</row>
</subtable>
</row>
<row>
<label>view</label>
<nb_visits>1</nb_visits>
<nb_hits>1</nb_hits>
<sum_time_spent>0</sum_time_spent>
<nb_hits_with_time_generation>1</nb_hits_with_time_generation>
<min_time_generation>0.001</min_time_generation>
<max_time_generation>0.001</max_time_generation>
<entry_nb_visits>1</entry_nb_visits>
<entry_nb_actions>1</entry_nb_actions>
<entry_sum_visit_length>0</entry_sum_visit_length>
<entry_bounce_count>1</entry_bounce_count>
<exit_nb_visits>1</exit_nb_visits>
<avg_time_on_page>0</avg_time_on_page>
<bounce_rate>100%</bounce_rate>
<exit_rate>100%</exit_rate>
<avg_time_generation>0.001</avg_time_generation>
<subtable>
<row>
<label>my</label>
<nb_visits>1</nb_visits>
<nb_hits>1</nb_hits>
<sum_time_spent>0</sum_time_spent>
<nb_hits_with_time_generation>1</nb_hits_with_time_generation>
<min_time_generation>0.001</min_time_generation>
<max_time_generation>0.001</max_time_generation>
<entry_nb_visits>1</entry_nb_visits>
<entry_nb_actions>1</entry_nb_actions>
<entry_sum_visit_length>0</entry_sum_visit_length>
<entry_bounce_count>1</entry_bounce_count>
<exit_nb_visits>1</exit_nb_visits>
<avg_time_on_page>0</avg_time_on_page>
<bounce_rate>100%</bounce_rate>
<exit_rate>100%</exit_rate>
<avg_time_generation>0.001</avg_time_generation>
<subtable>
<row>
<label>/file.html</label>
<nb_visits>1</nb_visits>
<nb_hits>1</nb_hits>
<sum_time_spent>0</sum_time_spent>
<nb_hits_with_time_generation>1</nb_hits_with_time_generation>
<min_time_generation>0.001</min_time_generation>
<max_time_generation>0.001</max_time_generation>
<entry_nb_visits>1</entry_nb_visits>
<entry_nb_actions>1</entry_nb_actions>
<entry_sum_visit_length>0</entry_sum_visit_length>
<entry_bounce_count>1</entry_bounce_count>
<exit_nb_visits>1</exit_nb_visits>
<sum_daily_nb_uniq_visitors>1</sum_daily_nb_uniq_visitors>
<sum_daily_entry_nb_uniq_visitors>1</sum_daily_entry_nb_uniq_visitors>
<sum_daily_exit_nb_uniq_visitors>1</sum_daily_exit_nb_uniq_visitors>
<avg_time_on_page>0</avg_time_on_page>
<bounce_rate>100%</bounce_rate>
<exit_rate>100%</exit_rate>
<avg_time_generation>0.001</avg_time_generation>
<url>http://piwik.net/view/my/file.html</url>
</row>
</subtable>
</row>
</subtable>
</row>
</result>
Original file line number Diff line number Diff line change
Expand Up @@ -1113,4 +1113,66 @@
</row>
</subtable>
</row>
<row>
<label>view</label>
<nb_visits>1</nb_visits>
<nb_hits>1</nb_hits>
<sum_time_spent>0</sum_time_spent>
<nb_hits_with_time_generation>1</nb_hits_with_time_generation>
<min_time_generation>0.001</min_time_generation>
<max_time_generation>0.001</max_time_generation>
<entry_nb_visits>1</entry_nb_visits>
<entry_nb_actions>1</entry_nb_actions>
<entry_sum_visit_length>0</entry_sum_visit_length>
<entry_bounce_count>1</entry_bounce_count>
<exit_nb_visits>1</exit_nb_visits>
<avg_time_on_page>0</avg_time_on_page>
<bounce_rate>100%</bounce_rate>
<exit_rate>100%</exit_rate>
<avg_time_generation>0.001</avg_time_generation>
<subtable>
<row>
<label>my</label>
<nb_visits>1</nb_visits>
<nb_hits>1</nb_hits>
<sum_time_spent>0</sum_time_spent>
<nb_hits_with_time_generation>1</nb_hits_with_time_generation>
<min_time_generation>0.001</min_time_generation>
<max_time_generation>0.001</max_time_generation>
<entry_nb_visits>1</entry_nb_visits>
<entry_nb_actions>1</entry_nb_actions>
<entry_sum_visit_length>0</entry_sum_visit_length>
<entry_bounce_count>1</entry_bounce_count>
<exit_nb_visits>1</exit_nb_visits>
<avg_time_on_page>0</avg_time_on_page>
<bounce_rate>100%</bounce_rate>
<exit_rate>100%</exit_rate>
<avg_time_generation>0.001</avg_time_generation>
<subtable>
<row>
<label>/file.html</label>
<nb_visits>1</nb_visits>
<nb_hits>1</nb_hits>
<sum_time_spent>0</sum_time_spent>
<nb_hits_with_time_generation>1</nb_hits_with_time_generation>
<min_time_generation>0.001</min_time_generation>
<max_time_generation>0.001</max_time_generation>
<entry_nb_visits>1</entry_nb_visits>
<entry_nb_actions>1</entry_nb_actions>
<entry_sum_visit_length>0</entry_sum_visit_length>
<entry_bounce_count>1</entry_bounce_count>
<exit_nb_visits>1</exit_nb_visits>
<sum_daily_nb_uniq_visitors>1</sum_daily_nb_uniq_visitors>
<sum_daily_entry_nb_uniq_visitors>1</sum_daily_entry_nb_uniq_visitors>
<sum_daily_exit_nb_uniq_visitors>1</sum_daily_exit_nb_uniq_visitors>
<avg_time_on_page>0</avg_time_on_page>
<bounce_rate>100%</bounce_rate>
<exit_rate>100%</exit_rate>
<avg_time_generation>0.001</avg_time_generation>
<url>http://piwik.net/view/my/file.html</url>
</row>
</subtable>
</row>
</subtable>
</row>
</result>
Loading

0 comments on commit 04f64ac

Please sign in to comment.