Skip to content

Commit

Permalink
Refs matomo-org#5980, parse cs-username variable in IIS logs and use …
Browse files Browse the repository at this point in the history
…as userid when present and not anonymous. Includes changes to ImportLogsTest.
  • Loading branch information
diosmosis committed Dec 8, 2014
1 parent fe24bcc commit 3d3ec54
Show file tree
Hide file tree
Showing 63 changed files with 1,129 additions and 972 deletions.
16 changes: 14 additions & 2 deletions misc/log-analytics/import_logs.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,11 +195,12 @@ class IisFormat(RegexFormat):
'cs-uri-stem': '(?P<path>/\S*)',
'cs-uri-query': '(?P<query_string>\S*)',
'c-ip': '"?(?P<ip>[\d*.]*)"?',
'cs(User-Agent)': '(?P<user_agent>".*?"|\S+)', # TODO: also benchmark regex.
'cs(User-Agent)': '(?P<user_agent>".*?"|\S+)',
'cs(Referer)': '(?P<referrer>\S+)',
'sc-status': '(?P<status>\d+)',
'sc-bytes': '(?P<length>\S+)',
'cs-host': '(?P<host>\S+)',
'cs-username': '(?P<userid>\S+)'
}

def __init__(self):
Expand Down Expand Up @@ -1285,8 +1286,9 @@ def _get_hit_args(self, hit):
'cdt': self.date_to_piwik(hit.date),
'idsite': site_id,
'dp': '0' if config.options.reverse_dns else '1',
'ua': hit.user_agent.encode('utf8'),
'ua': hit.user_agent.encode('utf8')
}

if config.options.replay_tracking:
# prevent request to be force recorded when option replay-tracking
args['rec'] = '0'
Expand Down Expand Up @@ -1695,6 +1697,16 @@ def invalid_line(line, reason):
# Some formats have no host.
pass

# Add userid
try:
hit.userid = None

userid = format.get('userid')
if userid != '-':
hit.args['uid'] = userid
except:
pass

# Check if the hit must be excluded.
if not all((method(hit) for method in self.check_methods)):
continue
Expand Down
3 changes: 2 additions & 1 deletion misc/log-analytics/tests/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,7 +232,8 @@ def check_iis_groups(groups):
assert groups['host'] == 'example.com'

expected_hit_properties = ['date', 'path', 'query_string', 'ip', 'referrer', 'user_agent',
'status', 'length', 'host']
'status', 'length', 'host', 'userid']

for property_name in groups.keys():
assert property_name in expected_hit_properties

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,13 @@
<nb_visits>2</nb_visits>
<nb_hits>2</nb_hits>
<sum_time_spent>180</sum_time_spent>
<entry_nb_visits>2</entry_nb_visits>
<entry_nb_actions>3</entry_nb_actions>
<entry_nb_visits>1</entry_nb_visits>
<entry_nb_actions>2</entry_nb_actions>
<entry_sum_visit_length>182</entry_sum_visit_length>
<entry_bounce_count>1</entry_bounce_count>
<entry_bounce_count>0</entry_bounce_count>
<exit_nb_visits>1</exit_nb_visits>
<avg_time_on_page>90</avg_time_on_page>
<bounce_rate>50%</bounce_rate>
<bounce_rate>0%</bounce_rate>
<exit_rate>50%</exit_rate>
<subtable>
<row>
Expand All @@ -29,23 +29,6 @@
<bounce_rate>0%</bounce_rate>
<exit_rate>0%</exit_rate>
</row>
<row>
<label> URL = http://piwik.net/Topic/hw43061</label>
<nb_visits>1</nb_visits>
<nb_hits>1</nb_hits>
<sum_time_spent>0</sum_time_spent>
<entry_nb_visits>1</entry_nb_visits>
<entry_nb_actions>1</entry_nb_actions>
<entry_sum_visit_length>0</entry_sum_visit_length>
<entry_bounce_count>1</entry_bounce_count>
<exit_nb_visits>1</exit_nb_visits>
<sum_daily_nb_uniq_visitors>1</sum_daily_nb_uniq_visitors>
<sum_daily_entry_nb_uniq_visitors>1</sum_daily_entry_nb_uniq_visitors>
<sum_daily_exit_nb_uniq_visitors>1</sum_daily_exit_nb_uniq_visitors>
<avg_time_on_page>0</avg_time_on_page>
<bounce_rate>100%</bounce_rate>
<exit_rate>100%</exit_rate>
</row>
</subtable>
</row>
<row>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,13 +29,13 @@
<nb_hits_with_time_generation>0</nb_hits_with_time_generation>
<min_time_generation />
<max_time_generation>0</max_time_generation>
<entry_nb_visits>2</entry_nb_visits>
<entry_nb_actions>3</entry_nb_actions>
<entry_nb_visits>1</entry_nb_visits>
<entry_nb_actions>2</entry_nb_actions>
<entry_sum_visit_length>182</entry_sum_visit_length>
<entry_bounce_count>1</entry_bounce_count>
<entry_bounce_count>0</entry_bounce_count>
<exit_nb_visits>1</exit_nb_visits>
<avg_time_on_page>90</avg_time_on_page>
<bounce_rate>50%</bounce_rate>
<bounce_rate>0%</bounce_rate>
<exit_rate>50%</exit_rate>
<avg_time_generation>0</avg_time_generation>
<subtable>
Expand All @@ -54,23 +54,6 @@
<bounce_rate>0%</bounce_rate>
<exit_rate>0%</exit_rate>
</row>
<row>
<label> URL = http://piwik.net/Topic/hw43061</label>
<nb_visits>1</nb_visits>
<nb_hits>1</nb_hits>
<sum_time_spent>0</sum_time_spent>
<entry_nb_visits>1</entry_nb_visits>
<entry_nb_actions>1</entry_nb_actions>
<entry_sum_visit_length>0</entry_sum_visit_length>
<entry_bounce_count>1</entry_bounce_count>
<exit_nb_visits>1</exit_nb_visits>
<sum_daily_nb_uniq_visitors>1</sum_daily_nb_uniq_visitors>
<sum_daily_entry_nb_uniq_visitors>1</sum_daily_entry_nb_uniq_visitors>
<sum_daily_exit_nb_uniq_visitors>1</sum_daily_exit_nb_uniq_visitors>
<avg_time_on_page>0</avg_time_on_page>
<bounce_rate>100%</bounce_rate>
<exit_rate>100%</exit_rate>
</row>
</subtable>
</row>
<row>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -468,6 +468,122 @@
</row>
</subtable>
</row>
<row>
<label>hello</label>
<nb_visits>2</nb_visits>
<nb_hits>2</nb_hits>
<sum_time_spent>0</sum_time_spent>
<entry_nb_visits>2</entry_nb_visits>
<entry_nb_actions>2</entry_nb_actions>
<entry_sum_visit_length>1</entry_sum_visit_length>
<entry_bounce_count>2</entry_bounce_count>
<exit_nb_visits>2</exit_nb_visits>
<avg_time_on_page>0</avg_time_on_page>
<bounce_rate>100%</bounce_rate>
<exit_rate>100%</exit_rate>
<subtable>
<row>
<label>from</label>
<nb_visits>1</nb_visits>
<nb_hits>1</nb_hits>
<sum_time_spent>0</sum_time_spent>
<entry_nb_visits>1</entry_nb_visits>
<entry_nb_actions>1</entry_nb_actions>
<entry_sum_visit_length>1</entry_sum_visit_length>
<entry_bounce_count>1</entry_bounce_count>
<exit_nb_visits>1</exit_nb_visits>
<avg_time_on_page>0</avg_time_on_page>
<bounce_rate>100%</bounce_rate>
<exit_rate>100%</exit_rate>
<subtable>
<row>
<label>another</label>
<nb_visits>1</nb_visits>
<nb_hits>1</nb_hits>
<sum_time_spent>0</sum_time_spent>
<entry_nb_visits>1</entry_nb_visits>
<entry_nb_actions>1</entry_nb_actions>
<entry_sum_visit_length>1</entry_sum_visit_length>
<entry_bounce_count>1</entry_bounce_count>
<exit_nb_visits>1</exit_nb_visits>
<avg_time_on_page>0</avg_time_on_page>
<bounce_rate>100%</bounce_rate>
<exit_rate>100%</exit_rate>
<subtable>
<row>
<label>world</label>
<nb_visits>1</nb_visits>
<nb_hits>1</nb_hits>
<sum_time_spent>0</sum_time_spent>
<entry_nb_visits>1</entry_nb_visits>
<entry_nb_actions>1</entry_nb_actions>
<entry_sum_visit_length>1</entry_sum_visit_length>
<entry_bounce_count>1</entry_bounce_count>
<exit_nb_visits>1</exit_nb_visits>
<avg_time_on_page>0</avg_time_on_page>
<bounce_rate>100%</bounce_rate>
<exit_rate>100%</exit_rate>
<subtable>
<row>
<label>/6,681965</label>
<nb_visits>1</nb_visits>
<nb_hits>1</nb_hits>
<sum_time_spent>0</sum_time_spent>
<entry_nb_visits>1</entry_nb_visits>
<entry_nb_actions>1</entry_nb_actions>
<entry_sum_visit_length>1</entry_sum_visit_length>
<entry_bounce_count>1</entry_bounce_count>
<exit_nb_visits>1</exit_nb_visits>
<sum_daily_nb_uniq_visitors>1</sum_daily_nb_uniq_visitors>
<sum_daily_entry_nb_uniq_visitors>1</sum_daily_entry_nb_uniq_visitors>
<sum_daily_exit_nb_uniq_visitors>1</sum_daily_exit_nb_uniq_visitors>
<avg_time_on_page>0</avg_time_on_page>
<bounce_rate>100%</bounce_rate>
<exit_rate>100%</exit_rate>
<url>http://piwik.net/hello/from/another/world/6,681965</url>
</row>
</subtable>
</row>
</subtable>
</row>
</subtable>
</row>
<row>
<label>world</label>
<nb_visits>1</nb_visits>
<nb_hits>1</nb_hits>
<sum_time_spent>0</sum_time_spent>
<entry_nb_visits>1</entry_nb_visits>
<entry_nb_actions>1</entry_nb_actions>
<entry_sum_visit_length>0</entry_sum_visit_length>
<entry_bounce_count>1</entry_bounce_count>
<exit_nb_visits>1</exit_nb_visits>
<avg_time_on_page>0</avg_time_on_page>
<bounce_rate>100%</bounce_rate>
<exit_rate>100%</exit_rate>
<subtable>
<row>
<label>/6,681965</label>
<nb_visits>1</nb_visits>
<nb_hits>1</nb_hits>
<sum_time_spent>0</sum_time_spent>
<entry_nb_visits>1</entry_nb_visits>
<entry_nb_actions>1</entry_nb_actions>
<entry_sum_visit_length>0</entry_sum_visit_length>
<entry_bounce_count>1</entry_bounce_count>
<exit_nb_visits>1</exit_nb_visits>
<sum_daily_nb_uniq_visitors>1</sum_daily_nb_uniq_visitors>
<sum_daily_entry_nb_uniq_visitors>1</sum_daily_entry_nb_uniq_visitors>
<sum_daily_exit_nb_uniq_visitors>1</sum_daily_exit_nb_uniq_visitors>
<avg_time_on_page>0</avg_time_on_page>
<bounce_rate>100%</bounce_rate>
<exit_rate>100%</exit_rate>
<url>http://piwik.net/hello/world/6,681965</url>
</row>
</subtable>
</row>
</subtable>
</row>
<row>
<label>/index.htm</label>
<nb_visits>1</nb_visits>
Expand Down Expand Up @@ -588,56 +704,6 @@
</row>
</subtable>
</row>
<row>
<label>hello</label>
<nb_visits>1</nb_visits>
<nb_hits>1</nb_hits>
<sum_time_spent>0</sum_time_spent>
<entry_nb_visits>1</entry_nb_visits>
<entry_nb_actions>1</entry_nb_actions>
<entry_sum_visit_length>0</entry_sum_visit_length>
<entry_bounce_count>1</entry_bounce_count>
<exit_nb_visits>1</exit_nb_visits>
<avg_time_on_page>0</avg_time_on_page>
<bounce_rate>100%</bounce_rate>
<exit_rate>100%</exit_rate>
<subtable>
<row>
<label>world</label>
<nb_visits>1</nb_visits>
<nb_hits>1</nb_hits>
<sum_time_spent>0</sum_time_spent>
<entry_nb_visits>1</entry_nb_visits>
<entry_nb_actions>1</entry_nb_actions>
<entry_sum_visit_length>0</entry_sum_visit_length>
<entry_bounce_count>1</entry_bounce_count>
<exit_nb_visits>1</exit_nb_visits>
<avg_time_on_page>0</avg_time_on_page>
<bounce_rate>100%</bounce_rate>
<exit_rate>100%</exit_rate>
<subtable>
<row>
<label>/6,681965</label>
<nb_visits>1</nb_visits>
<nb_hits>1</nb_hits>
<sum_time_spent>0</sum_time_spent>
<entry_nb_visits>1</entry_nb_visits>
<entry_nb_actions>1</entry_nb_actions>
<entry_sum_visit_length>0</entry_sum_visit_length>
<entry_bounce_count>1</entry_bounce_count>
<exit_nb_visits>1</exit_nb_visits>
<sum_daily_nb_uniq_visitors>1</sum_daily_nb_uniq_visitors>
<sum_daily_entry_nb_uniq_visitors>1</sum_daily_entry_nb_uniq_visitors>
<sum_daily_exit_nb_uniq_visitors>1</sum_daily_exit_nb_uniq_visitors>
<avg_time_on_page>0</avg_time_on_page>
<bounce_rate>100%</bounce_rate>
<exit_rate>100%</exit_rate>
<url>http://piwik.net/hello/world/6,681965</url>
</row>
</subtable>
</row>
</subtable>
</row>
<row>
<label>hosting</label>
<nb_visits>1</nb_visits>
Expand Down Expand Up @@ -746,30 +812,27 @@
<nb_hits>1</nb_hits>
<sum_time_spent>0</sum_time_spent>
<entry_nb_visits>1</entry_nb_visits>
<entry_nb_actions>1</entry_nb_actions>
<entry_sum_visit_length>0</entry_sum_visit_length>
<entry_bounce_count>1</entry_bounce_count>
<exit_nb_visits>1</exit_nb_visits>
<entry_nb_actions>2</entry_nb_actions>
<entry_sum_visit_length>2</entry_sum_visit_length>
<entry_bounce_count>0</entry_bounce_count>
<avg_time_on_page>0</avg_time_on_page>
<bounce_rate>100%</bounce_rate>
<exit_rate>100%</exit_rate>
<bounce_rate>0%</bounce_rate>
<exit_rate>0%</exit_rate>
<subtable>
<row>
<label>/theProduct</label>
<nb_visits>1</nb_visits>
<nb_hits>1</nb_hits>
<sum_time_spent>0</sum_time_spent>
<entry_nb_visits>1</entry_nb_visits>
<entry_nb_actions>1</entry_nb_actions>
<entry_sum_visit_length>0</entry_sum_visit_length>
<entry_bounce_count>1</entry_bounce_count>
<exit_nb_visits>1</exit_nb_visits>
<entry_nb_actions>2</entry_nb_actions>
<entry_sum_visit_length>2</entry_sum_visit_length>
<entry_bounce_count>0</entry_bounce_count>
<sum_daily_nb_uniq_visitors>1</sum_daily_nb_uniq_visitors>
<sum_daily_entry_nb_uniq_visitors>1</sum_daily_entry_nb_uniq_visitors>
<sum_daily_exit_nb_uniq_visitors>1</sum_daily_exit_nb_uniq_visitors>
<avg_time_on_page>0</avg_time_on_page>
<bounce_rate>100%</bounce_rate>
<exit_rate>100%</exit_rate>
<bounce_rate>0%</bounce_rate>
<exit_rate>0%</exit_rate>
<url>http://piwik.net/Products/theProduct</url>
</row>
</subtable>
Expand Down Expand Up @@ -936,40 +999,6 @@
</row>
</subtable>
</row>
<row>
<label>Topic</label>
<nb_visits>1</nb_visits>
<nb_hits>1</nb_hits>
<sum_time_spent>0</sum_time_spent>
<entry_nb_visits>1</entry_nb_visits>
<entry_nb_actions>1</entry_nb_actions>
<entry_sum_visit_length>0</entry_sum_visit_length>
<entry_bounce_count>1</entry_bounce_count>
<exit_nb_visits>1</exit_nb_visits>
<avg_time_on_page>0</avg_time_on_page>
<bounce_rate>100%</bounce_rate>
<exit_rate>100%</exit_rate>
<subtable>
<row>
<label>/hw43061</label>
<nb_visits>1</nb_visits>
<nb_hits>1</nb_hits>
<sum_time_spent>0</sum_time_spent>
<entry_nb_visits>1</entry_nb_visits>
<entry_nb_actions>1</entry_nb_actions>
<entry_sum_visit_length>0</entry_sum_visit_length>
<entry_bounce_count>1</entry_bounce_count>
<exit_nb_visits>1</exit_nb_visits>
<sum_daily_nb_uniq_visitors>1</sum_daily_nb_uniq_visitors>
<sum_daily_entry_nb_uniq_visitors>1</sum_daily_entry_nb_uniq_visitors>
<sum_daily_exit_nb_uniq_visitors>1</sum_daily_exit_nb_uniq_visitors>
<avg_time_on_page>0</avg_time_on_page>
<bounce_rate>100%</bounce_rate>
<exit_rate>100%</exit_rate>
<url>http://piwik.net/Topic/hw43061</url>
</row>
</subtable>
</row>
<row>
<label>translations</label>
<nb_visits>1</nb_visits>
Expand Down
Loading

0 comments on commit 3d3ec54

Please sign in to comment.