From 5c14cf8e5bff649230f91ef7e90dc3d3dd89f9a9 Mon Sep 17 00:00:00 2001 From: mattab Date: Fri, 5 Dec 2014 19:31:25 +1300 Subject: [PATCH] move logic that creates the SQL query into LogQueryBuilder class --- core/DataAccess/LogQueryBuilder.php | 261 ++++++++++++++++++++++++++++ core/Segment.php | 249 ++------------------------ core/Segment/SegmentExpression.php | 5 + 3 files changed, 284 insertions(+), 231 deletions(-) create mode 100644 core/DataAccess/LogQueryBuilder.php diff --git a/core/DataAccess/LogQueryBuilder.php b/core/DataAccess/LogQueryBuilder.php new file mode 100644 index 00000000000..254334ca8c5 --- /dev/null +++ b/core/DataAccess/LogQueryBuilder.php @@ -0,0 +1,261 @@ +isEmpty()) { + $segmentExpression->parseSubExpressionsIntoSqlExpressions($from); + + $joins = $this->generateJoins($from); + $from = $joins['sql']; + $joinWithSubSelect = $joins['joinWithSubSelect']; + + $segmentSql = $segmentExpression->getSql(); + $segmentWhere = $segmentSql['where']; + if (!empty($segmentWhere)) { + if (!empty($where)) { + $where = "( $where ) + AND + ($segmentWhere)"; + } else { + $where = $segmentWhere; + } + } + + $bind = array_merge($bind, $segmentSql['bind']); + } else { + $joins = $this->generateJoins($from); + $from = $joins['sql']; + $joinWithSubSelect = $joins['joinWithSubSelect']; + } + + if ($joinWithSubSelect) { + $sql = $this->buildWrappedSelectQuery($select, $from, $where, $orderBy, $groupBy); + } else { + $sql = $this->buildSelectQuery($select, $from, $where, $orderBy, $groupBy); + } + return array( + 'sql' => $sql, + 'bind' => $bind + ); + } + /** + * Generate the join sql based on the needed tables + * @param array $tables tables to join + * @throws Exception if tables can't be joined + * @return array + */ + private function generateJoins($tables) + { + $knownTables = array("log_visit", "log_link_visit_action", "log_conversion", "log_conversion_item"); + $visitsAvailable = $actionsAvailable = $conversionsAvailable = $conversionItemAvailable = false; + $joinWithSubSelect = false; + $sql = ''; + + // make sure the tables are joined in the right order + // base table first, then action before conversion + // this way, conversions can be joined on idlink_va + $actionIndex = array_search("log_link_visit_action", $tables); + $conversionIndex = array_search("log_conversion", $tables); + if ($actionIndex > 0 && $conversionIndex > 0 && $actionIndex > $conversionIndex) { + $tables[$actionIndex] = "log_conversion"; + $tables[$conversionIndex] = "log_link_visit_action"; + } + + // same as above: action before visit + $actionIndex = array_search("log_link_visit_action", $tables); + $visitIndex = array_search("log_visit", $tables); + if ($actionIndex > 0 && $visitIndex > 0 && $actionIndex > $visitIndex) { + $tables[$actionIndex] = "log_visit"; + $tables[$visitIndex] = "log_link_visit_action"; + } + + foreach ($tables as $i => $table) { + if (is_array($table)) { + // join condition provided + $alias = isset($table['tableAlias']) ? $table['tableAlias'] : $table['table']; + $sql .= " + LEFT JOIN " . Common::prefixTable($table['table']) . " AS " . $alias + . " ON " . $table['joinOn']; + continue; + } + + if (!in_array($table, $knownTables)) { + throw new Exception("Table '$table' can't be used for segmentation"); + } + + $tableSql = Common::prefixTable($table) . " AS $table"; + + if ($i == 0) { + // first table + $sql .= $tableSql; + } else { + if ($actionsAvailable && $table == "log_conversion") { + // have actions, need conversions => join on idlink_va + $join = "log_conversion.idlink_va = log_link_visit_action.idlink_va " + . "AND log_conversion.idsite = log_link_visit_action.idsite"; + } else if ($actionsAvailable && $table == "log_visit") { + // have actions, need visits => join on idvisit + $join = "log_visit.idvisit = log_link_visit_action.idvisit"; + } else if ($visitsAvailable && $table == "log_link_visit_action") { + // have visits, need actions => we have to use a more complex join + // we don't hande this here, we just return joinWithSubSelect=true in this case + $joinWithSubSelect = true; + $join = "log_link_visit_action.idvisit = log_visit.idvisit"; + } else if ($conversionsAvailable && $table == "log_link_visit_action") { + // have conversions, need actions => join on idlink_va + $join = "log_conversion.idlink_va = log_link_visit_action.idlink_va"; + } else if (($visitsAvailable && $table == "log_conversion") + || ($conversionsAvailable && $table == "log_visit") + ) { + // have visits, need conversion (or vice versa) => join on idvisit + // notice that joining conversions on visits has lower priority than joining it on actions + $join = "log_conversion.idvisit = log_visit.idvisit"; + + // if conversions are joined on visits, we need a complex join + if ($table == "log_conversion") { + $joinWithSubSelect = true; + } + } elseif ($conversionItemAvailable && $table === 'log_visit') { + $join = "log_conversion_item.idvisit = log_visit.idvisit"; + } elseif ($conversionItemAvailable && $table === 'log_link_visit_action') { + $join = "log_conversion_item.idvisit = log_link_visit_action.idvisit"; + } elseif ($conversionItemAvailable && $table === 'log_conversion') { + $join = "log_conversion_item.idvisit = log_conversion.idvisit"; + } else { + throw new Exception("Table '$table' can't be joined for segmentation"); + } + + // the join sql the default way + $sql .= " + LEFT JOIN $tableSql ON $join"; + } + + // remember which tables are available + $visitsAvailable = ($visitsAvailable || $table == "log_visit"); + $actionsAvailable = ($actionsAvailable || $table == "log_link_visit_action"); + $conversionsAvailable = ($conversionsAvailable || $table == "log_conversion"); + $conversionItemAvailable = ($conversionItemAvailable || $table == "log_conversion_item"); + } + + $return = array( + 'sql' => $sql, + 'joinWithSubSelect' => $joinWithSubSelect + ); + return $return; + + } + + /** + * Build select query the normal way + * @param string $select fieldlist to be selected + * @param string $from tablelist to select from + * @param string $where where clause + * @param string $orderBy order by clause + * @param string $groupBy group by clause + * @return string + */ + private function buildSelectQuery($select, $from, $where, $orderBy, $groupBy) + { + $sql = " + SELECT + $select + FROM + $from"; + + if ($where) { + $sql .= " + WHERE + $where"; + } + + if ($groupBy) { + $sql .= " + GROUP BY + $groupBy"; + } + + if ($orderBy) { + $sql .= " + ORDER BY + $orderBy"; + } + + return $sql; + } + + /** + * Build a select query where actions have to be joined on visits (or conversions) + * In this case, the query gets wrapped in another query so that grouping by visit is possible + * @param string $select + * @param string $from + * @param string $where + * @param string $orderBy + * @param string $groupBy + * @throws Exception + * @return string + */ + private function buildWrappedSelectQuery($select, $from, $where, $orderBy, $groupBy) + { + $matchTables = "(log_visit|log_conversion_item|log_conversion|log_action)"; + preg_match_all("/". $matchTables ."\.[a-z0-9_\*]+/", $select, $matches); + $neededFields = array_unique($matches[0]); + + if (count($neededFields) == 0) { + throw new Exception("No needed fields found in select expression. " + . "Please use a table prefix."); + } + + $select = preg_replace('/'.$matchTables.'\./', 'log_inner.', $select); + $orderBy = preg_replace('/'.$matchTables.'\./', 'log_inner.', $orderBy); + $groupBy = preg_replace('/'.$matchTables.'\./', 'log_inner.', $groupBy); + + $from = "( + SELECT + " . implode(", + ", $neededFields) . " + FROM + $from + WHERE + $where + GROUP BY log_visit.idvisit + ) AS log_inner"; + + $where = false; + $query = $this->buildSelectQuery($select, $from, $where, $orderBy, $groupBy); + return $query; + } + +} \ No newline at end of file diff --git a/core/Segment.php b/core/Segment.php index d990ed16ffd..5229caf3c59 100644 --- a/core/Segment.php +++ b/core/Segment.php @@ -9,6 +9,7 @@ namespace Piwik; use Exception; +use Piwik\DataAccess\LogQueryBuilder; use Piwik\Plugins\API\API; use Piwik\Segment\SegmentExpression; @@ -60,6 +61,16 @@ class Segment */ protected $segment = null; + /** + * @var string + */ + protected $string = null; + + /** + * @var array + */ + protected $idSites = null; + /** * Truncate the Segments to 8k */ @@ -69,9 +80,8 @@ class Segment * Constructor. * * @param string $segmentCondition The segment condition, eg, `'browserCode=ff;countryCode=CA'`. - * @param array $idSites The list of sites the segment will be used with. Some segments are - * dependent on the site, such as goal segments. - * @throws Exception + * @param array $idSites The list of sites the st + * @throws */ public function __construct($segmentCondition, $idSites) { @@ -128,7 +138,7 @@ protected function initializeSegment($string, $idSites) */ public function isEmpty() { - return empty($this->string); + return $this->segment->isEmpty(); } protected $availableSegments = array(); @@ -227,234 +237,10 @@ public function getHash() */ public function getSelectQuery($select, $from, $where = false, $bind = array(), $orderBy = false, $groupBy = false) { - if (!is_array($from)) { - $from = array($from); - } - - if (!$this->isEmpty()) { - $this->segment->parseSubExpressionsIntoSqlExpressions($from); - - $joins = $this->generateJoins($from); - $from = $joins['sql']; - $joinWithSubSelect = $joins['joinWithSubSelect']; - - $segmentSql = $this->segment->getSql(); - $segmentWhere = $segmentSql['where']; - if (!empty($segmentWhere)) { - if (!empty($where)) { - $where = "( $where ) - AND - ($segmentWhere)"; - } else { - $where = $segmentWhere; - } - } - - $bind = array_merge($bind, $segmentSql['bind']); - } else { - $joins = $this->generateJoins($from); - $from = $joins['sql']; - $joinWithSubSelect = $joins['joinWithSubSelect']; - } - - if ($joinWithSubSelect) { - $sql = $this->buildWrappedSelectQuery($select, $from, $where, $orderBy, $groupBy); - } else { - $sql = $this->buildSelectQuery($select, $from, $where, $orderBy, $groupBy); - } - return array( - 'sql' => $sql, - 'bind' => $bind - ); + $segmentExpression = $this->segment; + $segmentQuery = new LogQueryBuilder(); + return $segmentQuery->getSelectQueryString($segmentExpression, $select, $from, $where, $bind, $orderBy, $groupBy); } - - /** - * Generate the join sql based on the needed tables - * @param array $tables tables to join - * @throws Exception if tables can't be joined - * @return array - */ - private function generateJoins($tables) - { - $knownTables = array("log_visit", "log_link_visit_action", "log_conversion", "log_conversion_item"); - $visitsAvailable = $actionsAvailable = $conversionsAvailable = $conversionItemAvailable = false; - $joinWithSubSelect = false; - $sql = ''; - - // make sure the tables are joined in the right order - // base table first, then action before conversion - // this way, conversions can be joined on idlink_va - $actionIndex = array_search("log_link_visit_action", $tables); - $conversionIndex = array_search("log_conversion", $tables); - if ($actionIndex > 0 && $conversionIndex > 0 && $actionIndex > $conversionIndex) { - $tables[$actionIndex] = "log_conversion"; - $tables[$conversionIndex] = "log_link_visit_action"; - } - - // same as above: action before visit - $actionIndex = array_search("log_link_visit_action", $tables); - $visitIndex = array_search("log_visit", $tables); - if ($actionIndex > 0 && $visitIndex > 0 && $actionIndex > $visitIndex) { - $tables[$actionIndex] = "log_visit"; - $tables[$visitIndex] = "log_link_visit_action"; - } - - foreach ($tables as $i => $table) { - if (is_array($table)) { - // join condition provided - $alias = isset($table['tableAlias']) ? $table['tableAlias'] : $table['table']; - $sql .= " - LEFT JOIN " . Common::prefixTable($table['table']) . " AS " . $alias - . " ON " . $table['joinOn']; - continue; - } - - if (!in_array($table, $knownTables)) { - throw new Exception("Table '$table' can't be used for segmentation"); - } - - $tableSql = Common::prefixTable($table) . " AS $table"; - - if ($i == 0) { - // first table - $sql .= $tableSql; - } else { - if ($actionsAvailable && $table == "log_conversion") { - // have actions, need conversions => join on idlink_va - $join = "log_conversion.idlink_va = log_link_visit_action.idlink_va " - . "AND log_conversion.idsite = log_link_visit_action.idsite"; - } else if ($actionsAvailable && $table == "log_visit") { - // have actions, need visits => join on idvisit - $join = "log_visit.idvisit = log_link_visit_action.idvisit"; - } else if ($visitsAvailable && $table == "log_link_visit_action") { - // have visits, need actions => we have to use a more complex join - // we don't hande this here, we just return joinWithSubSelect=true in this case - $joinWithSubSelect = true; - $join = "log_link_visit_action.idvisit = log_visit.idvisit"; - } else if ($conversionsAvailable && $table == "log_link_visit_action") { - // have conversions, need actions => join on idlink_va - $join = "log_conversion.idlink_va = log_link_visit_action.idlink_va"; - } else if (($visitsAvailable && $table == "log_conversion") - || ($conversionsAvailable && $table == "log_visit") - ) { - // have visits, need conversion (or vice versa) => join on idvisit - // notice that joining conversions on visits has lower priority than joining it on actions - $join = "log_conversion.idvisit = log_visit.idvisit"; - - // if conversions are joined on visits, we need a complex join - if ($table == "log_conversion") { - $joinWithSubSelect = true; - } - } elseif ($conversionItemAvailable && $table === 'log_visit') { - $join = "log_conversion_item.idvisit = log_visit.idvisit"; - } elseif ($conversionItemAvailable && $table === 'log_link_visit_action') { - $join = "log_conversion_item.idvisit = log_link_visit_action.idvisit"; - } elseif ($conversionItemAvailable && $table === 'log_conversion') { - $join = "log_conversion_item.idvisit = log_conversion.idvisit"; - } else { - throw new Exception("Table '$table' can't be joined for segmentation"); - } - - // the join sql the default way - $sql .= " - LEFT JOIN $tableSql ON $join"; - } - - // remember which tables are available - $visitsAvailable = ($visitsAvailable || $table == "log_visit"); - $actionsAvailable = ($actionsAvailable || $table == "log_link_visit_action"); - $conversionsAvailable = ($conversionsAvailable || $table == "log_conversion"); - $conversionItemAvailable = ($conversionItemAvailable || $table == "log_conversion_item"); - } - - $return = array( - 'sql' => $sql, - 'joinWithSubSelect' => $joinWithSubSelect - ); - return $return; - - } - - /** - * Build select query the normal way - * @param string $select fieldlist to be selected - * @param string $from tablelist to select from - * @param string $where where clause - * @param string $orderBy order by clause - * @param string $groupBy group by clause - * @return string - */ - private function buildSelectQuery($select, $from, $where, $orderBy, $groupBy) - { - $sql = " - SELECT - $select - FROM - $from"; - - if ($where) { - $sql .= " - WHERE - $where"; - } - - if ($groupBy) { - $sql .= " - GROUP BY - $groupBy"; - } - - if ($orderBy) { - $sql .= " - ORDER BY - $orderBy"; - } - - return $sql; - } - - /** - * Build a select query where actions have to be joined on visits (or conversions) - * In this case, the query gets wrapped in another query so that grouping by visit is possible - * @param string $select - * @param string $from - * @param string $where - * @param string $orderBy - * @param string $groupBy - * @throws Exception - * @return string - */ - private function buildWrappedSelectQuery($select, $from, $where, $orderBy, $groupBy) - { - $matchTables = "(log_visit|log_conversion_item|log_conversion|log_action)"; - preg_match_all("/". $matchTables ."\.[a-z0-9_\*]+/", $select, $matches); - $neededFields = array_unique($matches[0]); - - if (count($neededFields) == 0) { - throw new Exception("No needed fields found in select expression. " - . "Please use a table prefix."); - } - - $select = preg_replace('/'.$matchTables.'\./', 'log_inner.', $select); - $orderBy = preg_replace('/'.$matchTables.'\./', 'log_inner.', $orderBy); - $groupBy = preg_replace('/'.$matchTables.'\./', 'log_inner.', $groupBy); - - $from = "( - SELECT - " . implode(", - ", $neededFields) . " - FROM - $from - WHERE - $where - GROUP BY log_visit.idvisit - ) AS log_inner"; - - $where = false; - $query = $this->buildSelectQuery($select, $from, $where, $orderBy, $groupBy); - return $query; - } - /** * Returns the segment string. * @@ -464,4 +250,5 @@ public function __toString() { return (string) $this->getString(); } + } \ No newline at end of file diff --git a/core/Segment/SegmentExpression.php b/core/Segment/SegmentExpression.php index e32bbe87232..4078321ce07 100644 --- a/core/Segment/SegmentExpression.php +++ b/core/Segment/SegmentExpression.php @@ -46,6 +46,11 @@ function __construct($string) $this->tree = $this->parseTree(); } + public function isEmpty() + { + return empty($this->string); + } + protected $joins = array(); protected $valuesBind = array(); protected $parsedTree = array();