First commit

This commit is contained in:
Theodotos Andreou 2018-01-14 13:10:16 +00:00
commit c6e2478c40
13918 changed files with 2303184 additions and 0 deletions

View file

@ -0,0 +1,49 @@
<?php
/**
* Class CRM_Dedupe_BAO_QueryBuilder
*/
class CRM_Dedupe_BAO_QueryBuilder {
/**
* @param $rg
* @param string $strID1
* @param string $strID2
*
* @return string
*/
public static function internalFilters($rg, $strID1 = 'contact1.id', $strID2 = 'contact2.id') {
// Add a contact id filter for dedupe by group requests and add logic
// to remove duplicate results with opposing orders, i.e. 1,2 and 2,1
if (!empty($rg->contactIds)) {
$cids = implode(',', $rg->contactIds);
return "($strID1 IN ($cids) AND ( $strID2 NOT IN ($cids) OR ($strID2 IN ($cids) AND $strID1 < $strID2) ))";
}
else {
return "($strID1 < $strID2)";
}
}
/**
* If a contact list is specified then adjust the query to ensure one contact is in that list.
*
* Doing an OR join here will lead to a server-killing unindexed query. However, a union will
* perform better.
*
* @param array $contactList
* @param string $query
* @param string $strID1
* @param string $strID2
*
* @return string
*/
protected static function filterQueryByContactList(array $contactList, $query, $strID1 = 'contact1.id', $strID2 = 'contact2.id') {
if (empty($contactList)) {
return $query . " AND ($strID1 < $strID2)";
}
$contactIDs = implode(',', $contactList);
return "$query AND $strID1 IN ($contactIDs) AND $strID1 > $strID2
UNION $query AND $strID1 > $strID2 AND $strID2 IN ($contactIDs) AND $strID1 NOT IN ($contactIDs)
";
}
}

View file

@ -0,0 +1,72 @@
<?php
/**
* TODO: How to handle NULL values/records?
* Class CRM_Dedupe_BAO_QueryBuilder_IndividualGeneral
*/
class CRM_Dedupe_BAO_QueryBuilder_IndividualGeneral extends CRM_Dedupe_BAO_QueryBuilder {
/**
* @param $rg
*
* @return array
*/
public static function record($rg) {
$civicrm_contact = CRM_Utils_Array::value('civicrm_contact', $rg->params);
$civicrm_address = CRM_Utils_Array::value('civicrm_address', $rg->params);
// Since definitely have first and last name, escape them upfront.
$first_name = CRM_Core_DAO::escapeString(CRM_Utils_Array::value('first_name', $civicrm_contact, ''));
$last_name = CRM_Core_DAO::escapeString(CRM_Utils_Array::value('last_name', $civicrm_contact, ''));
$street_address = CRM_Core_DAO::escapeString(CRM_Utils_Array::value('street_address', $civicrm_address, ''));
$query = "
SELECT contact1.id id1, {$rg->threshold} as weight
FROM civicrm_contact AS contact1
JOIN civicrm_address AS address1 ON contact1.id=address1.contact_id
WHERE contact1.contact_type = 'Individual'
AND contact1.first_name = '$first_name'
AND contact1.last_name = '$last_name'
AND address1.street_address = '$street_address'
";
if ($birth_date = CRM_Core_DAO::escapeString(CRM_Utils_Array::value('birth_date', $civicrm_contact, ''))) {
$query .= " AND (contact1.birth_date IS NULL or contact1.birth_date = '$birth_date')\n";
}
if ($suffix_id = CRM_Core_DAO::escapeString(CRM_Utils_Array::value('suffix_id', $civicrm_contact, ''))) {
$query .= " AND (contact1.suffix_id IS NULL or contact1.suffix_id = $suffix_id)\n";
}
if ($middle_name = CRM_Core_DAO::escapeString(CRM_Utils_Array::value('middle_name', $civicrm_contact, ''))) {
$query .= " AND (contact1.middle_name IS NULL or contact1.middle_name = '$middle_name')\n";
}
return array("civicrm_contact.{$rg->name}.{$rg->threshold}" => $query);
}
/**
* @param $rg
*
* @return array
*/
public static function internal($rg) {
$query = "
SELECT contact1.id id1, contact2.id id2, {$rg->threshold} weight
FROM civicrm_contact AS contact1
JOIN civicrm_contact AS contact2 ON (
contact1.first_name = contact2.first_name AND
contact1.last_name = contact2.last_name AND
contact1.contact_type = contact2.contact_type)
JOIN civicrm_address AS address1 ON address1.contact_id = contact1.id
JOIN civicrm_address AS address2 ON (
address2.contact_id = contact2.id AND
address2.street_address = address1.street_address)
WHERE contact1.contact_type = 'Individual'
AND (contact1.suffix_id IS NULL OR contact2.suffix_id IS NULL OR contact1.suffix_id = contact2.suffix_id)
AND (contact1.middle_name IS NULL OR contact2.middle_name IS NULL OR contact1.middle_name = contact2.middle_name)
AND (contact1.birth_date IS NULL OR contact2.birth_date IS NULL OR contact1.birth_date = contact2.birth_date)
AND " . self::internalFilters($rg);
return array("civicrm_contact.{$rg->name}.{$rg->threshold}" => $query);
}
}

View file

@ -0,0 +1,73 @@
<?php
/**
* TODO: How to handle NULL values/records?
* Class CRM_Dedupe_BAO_QueryBuilder_IndividualSupervised
*/
class CRM_Dedupe_BAO_QueryBuilder_IndividualSupervised extends CRM_Dedupe_BAO_QueryBuilder {
/**
* Record - what do I do.
*
* @param object $rg
*
* @return array
*/
public static function record($rg) {
$civicrm_contact = CRM_Utils_Array::value('civicrm_contact', $rg->params, array());
$civicrm_email = CRM_Utils_Array::value('civicrm_email', $rg->params, array());
$params = array(
1 => array(
CRM_Utils_Array::value('first_name', $civicrm_contact, ''),
'String',
),
2 => array(
CRM_Utils_Array::value('last_name', $civicrm_contact, ''),
'String',
),
3 => array(
CRM_Utils_Array::value('email', $civicrm_email, ''),
'String',
),
);
return array(
"civicrm_contact.{$rg->name}.{$rg->threshold}" => CRM_Core_DAO::composeQuery("
SELECT contact.id as id1, {$rg->threshold} as weight
FROM civicrm_contact as contact
JOIN civicrm_email as email ON email.contact_id=contact.id
WHERE contact_type = 'Individual'
AND first_name = %1
AND last_name = %2
AND email = %3", $params, TRUE),
);
}
/**
* Internal - what do I do.
*
* @param object $rg
*
* @return array
*/
public static function internal($rg) {
$query = self::filterQueryByContactList($rg->contactIds, "
SELECT contact1.id as id1, contact2.id as id2, {$rg->threshold} as weight
FROM civicrm_contact as contact1
JOIN civicrm_email as email1 ON email1.contact_id=contact1.id
JOIN civicrm_contact as contact2 ON
contact1.first_name = contact2.first_name AND
contact1.last_name = contact2.last_name
JOIN civicrm_email as email2 ON
email2.contact_id=contact2.id AND
email1.email=email2.email
WHERE contact1.contact_type = 'Individual'");
return array(
"civicrm_contact.{$rg->name}.{$rg->threshold}" => $query,
);
}
}

View file

@ -0,0 +1,115 @@
<?php
/*
+--------------------------------------------------------------------+
| CiviCRM version 4.7 |
+--------------------------------------------------------------------+
| Copyright CiviCRM LLC (c) 2004-2017 |
+--------------------------------------------------------------------+
| This file is a part of CiviCRM. |
| |
| CiviCRM is free software; you can copy, modify, and distribute it |
| under the terms of the GNU Affero General Public License |
| Version 3, 19 November 2007 and the CiviCRM Licensing Exception. |
| |
| CiviCRM is distributed in the hope that it will be useful, but |
| WITHOUT ANY WARRANTY; without even the implied warranty of |
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. |
| See the GNU Affero General Public License for more details. |
| |
| You should have received a copy of the GNU Affero General Public |
| License and the CiviCRM Licensing Exception along |
| with this program; if not, contact CiviCRM LLC |
| at info[AT]civicrm[DOT]org. If you have questions about the |
| GNU Affero General Public License or the licensing of CiviCRM, |
| see the CiviCRM license FAQ at http://civicrm.org/licensing |
+--------------------------------------------------------------------+
*/
/**
* Class CRM_Dedupe_BAO_QueryBuilder_IndividualUnsupervised
*/
class CRM_Dedupe_BAO_QueryBuilder_IndividualUnsupervised extends CRM_Dedupe_BAO_QueryBuilder {
/**
* @param $rg
*
* @return array
*/
public static function record($rg) {
$civicrm_email = CRM_Utils_Array::value('civicrm_email', $rg->params, array());
$params = array(
1 => array(CRM_Utils_Array::value('email', $civicrm_email, ''), 'String'),
);
return array(
"civicrm_contact.{$rg->name}.{$rg->threshold}" => CRM_Core_DAO::composeQuery("
SELECT contact.id as id1, {$rg->threshold} as weight
FROM civicrm_contact as contact
JOIN civicrm_email as email ON email.contact_id=contact.id
WHERE contact_type = 'Individual'
AND email = %1", $params, TRUE),
);
}
/**
* @param $rg
*
* @return array
*/
public static function internal($rg) {
$query = "
SELECT contact1.id as id1, contact2.id as id2, {$rg->threshold} as weight
FROM civicrm_contact as contact1
JOIN civicrm_email as email1 ON email1.contact_id=contact1.id
JOIN civicrm_contact as contact2
JOIN civicrm_email as email2 ON
email2.contact_id=contact2.id AND
email1.email=email2.email
WHERE contact1.contact_type = 'Individual'
AND " . self::internalFilters($rg);
return array("civicrm_contact.{$rg->name}.{$rg->threshold}" => $query);
}
/**
* An alternative version which might perform a lot better
* than the above. Will need to do some testing
*
* @param string $rg
*
* @return array
*/
public static function internalOptimized($rg) {
$sql = "
CREATE TEMPORARY TABLE emails (
email varchar(255),
contact_id1 int,
contact_id2 int,
INDEX(contact_id1),
INDEX(contact_id2)
) ENGINE=InnoDB
";
CRM_Core_DAO::executeQuery($sql);
$sql = "
INSERT INTO emails
SELECT email1.email as email, email1.contact_id as contact_id1, email2.contact_id as contact_id2
FROM civicrm_email as email1
JOIN civicrm_email as email2 USING (email)
WHERE email1.contact_id < email2.contact_id
AND " . self::internalFilters($rg, "email1.contact_id", "email2.contact_id");
CRM_Core_DAO::executeQuery($sql);
$query = "
SELECT contact_id1 as id1, contact_id2 as id2, {$rg->threshold} as weight
FROM emails
JOIN civicrm_contact as contact1 on contact1.id=contact_id1
JOIN civicrm_contact as contact2 on contact2.id=contact_id2
WHERE contact1.contact_type='Individual'
AND contact2.contact_type='Individual'
AND " . self::internalFilters($rg);
return array("civicrm_contact.{$rg->name}.{$rg->threshold}" => $query);
}
}

View file

@ -0,0 +1,243 @@
<?php
/*
+--------------------------------------------------------------------+
| CiviCRM version 4.7 |
+--------------------------------------------------------------------+
| Copyright CiviCRM LLC (c) 2004-2017 |
+--------------------------------------------------------------------+
| This file is a part of CiviCRM. |
| |
| CiviCRM is free software; you can copy, modify, and distribute it |
| under the terms of the GNU Affero General Public License |
| Version 3, 19 November 2007 and the CiviCRM Licensing Exception. |
| |
| CiviCRM is distributed in the hope that it will be useful, but |
| WITHOUT ANY WARRANTY; without even the implied warranty of |
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. |
| See the GNU Affero General Public License for more details. |
| |
| You should have received a copy of the GNU Affero General Public |
| License and the CiviCRM Licensing Exception along |
| with this program; if not, contact CiviCRM LLC |
| at info[AT]civicrm[DOT]org. If you have questions about the |
| GNU Affero General Public License or the licensing of CiviCRM, |
| see the CiviCRM license FAQ at http://civicrm.org/licensing |
+--------------------------------------------------------------------+
*/
/**
*
* @package CRM
* @copyright CiviCRM LLC (c) 2004-2017
* $Id$
*
*/
/**
* The CiviCRM duplicate discovery engine is based on an
* algorithm designed by David Strauss <david@fourkitchens.com>.
*/
class CRM_Dedupe_BAO_Rule extends CRM_Dedupe_DAO_Rule {
/**
* Ids of the contacts to limit the SQL queries (whole-database queries otherwise)
*/
var $contactIds = array();
/**
* Params to dedupe against (queries against the whole contact set otherwise)
*/
var $params = array();
/**
* Return the SQL query for the given rule - either for finding matching
* pairs of contacts, or for matching against the $params variable (if set).
*
* @return string
* SQL query performing the search
*/
public function sql() {
if ($this->params &&
(!array_key_exists($this->rule_table, $this->params) ||
!array_key_exists($this->rule_field, $this->params[$this->rule_table])
)
) {
// if params is present and doesn't have an entry for a field, don't construct the clause.
return NULL;
}
// we need to initialise WHERE, ON and USING here, as some table types
// extend them; $where is an array of required conditions, $on and
// $using are arrays of required field matchings (for substring and
// full matches, respectively)
$where = array();
$on = array("SUBSTR(t1.{$this->rule_field}, 1, {$this->rule_length}) = SUBSTR(t2.{$this->rule_field}, 1, {$this->rule_length})");
$using = array($this->rule_field);
switch ($this->rule_table) {
case 'civicrm_contact':
$id = 'id';
//we should restrict by contact type in the first step
$sql = "SELECT contact_type FROM civicrm_dedupe_rule_group WHERE id = {$this->dedupe_rule_group_id};";
$ct = CRM_Core_DAO::singleValueQuery($sql);
if ($this->params) {
$where[] = "t1.contact_type = '{$ct}'";
}
else {
$where[] = "t1.contact_type = '{$ct}'";
$where[] = "t2.contact_type = '{$ct}'";
}
break;
case 'civicrm_address':
$id = 'contact_id';
$on[] = 't1.location_type_id = t2.location_type_id';
$using[] = 'location_type_id';
if ($this->params['civicrm_address']['location_type_id']) {
$locTypeId = CRM_Utils_Type::escape($this->params['civicrm_address']['location_type_id'], 'Integer', FALSE);
if ($locTypeId) {
$where[] = "t1.location_type_id = $locTypeId";
}
}
break;
case 'civicrm_email':
case 'civicrm_im':
case 'civicrm_openid':
case 'civicrm_phone':
$id = 'contact_id';
break;
case 'civicrm_note':
$id = 'entity_id';
if ($this->params) {
$where[] = "t1.entity_table = 'civicrm_contact'";
}
else {
$where[] = "t1.entity_table = 'civicrm_contact'";
$where[] = "t2.entity_table = 'civicrm_contact'";
}
break;
default:
// custom data tables
if (preg_match('/^civicrm_value_/', $this->rule_table) || preg_match('/^custom_value_/', $this->rule_table)) {
$id = 'entity_id';
}
else {
CRM_Core_Error::fatal("Unsupported rule_table for civicrm_dedupe_rule.id of {$this->id}");
}
break;
}
// build SELECT based on the field names containing contact ids
// if there are params provided, id1 should be 0
if ($this->params) {
$select = "t1.$id id1, {$this->rule_weight} weight";
$subSelect = 'id1, weight';
}
else {
$select = "t1.$id id1, t2.$id id2, {$this->rule_weight} weight";
$subSelect = 'id1, id2, weight';
}
// build FROM (and WHERE, if it's a parametrised search)
// based on whether the rule is about substrings or not
if ($this->params) {
$from = "{$this->rule_table} t1";
$str = 'NULL';
if (isset($this->params[$this->rule_table][$this->rule_field])) {
$str = CRM_Utils_Type::escape($this->params[$this->rule_table][$this->rule_field], 'String');
}
if ($this->rule_length) {
$where[] = "SUBSTR(t1.{$this->rule_field}, 1, {$this->rule_length}) = SUBSTR('$str', 1, {$this->rule_length})";
$where[] = "t1.{$this->rule_field} IS NOT NULL";
$where[] = "t1.{$this->rule_field} <> ''";
}
else {
$where[] = "t1.{$this->rule_field} = '$str'";
}
}
else {
if ($this->rule_length) {
$from = "{$this->rule_table} t1 JOIN {$this->rule_table} t2 ON (" . implode(' AND ', $on) . ")";
}
else {
$from = "{$this->rule_table} t1 JOIN {$this->rule_table} t2 USING (" . implode(', ', $using) . ")";
}
}
// finish building WHERE, also limit the results if requested
if (!$this->params) {
$where[] = "t1.$id < t2.$id";
$where[] = "t1.{$this->rule_field} IS NOT NULL";
$where[] = "t1.{$this->rule_field} <> ''";
}
$query = "SELECT $select FROM $from WHERE " . implode(' AND ', $where);
if ($this->contactIds) {
$cids = array();
foreach ($this->contactIds as $cid) {
$cids[] = CRM_Utils_Type::escape($cid, 'Integer');
}
if (count($cids) == 1) {
$query .= " AND (t1.$id = {$cids[0]}) UNION $query AND t2.$id = {$cids[0]}";
}
else {
$query .= " AND t1.$id IN (" . implode(',', $cids) . ")
UNION $query AND t2.$id IN (" . implode(',', $cids) . ")";
}
// The `weight` is ambiguous in the context of the union; put the whole
// thing in a subquery.
$query = "SELECT $subSelect FROM ($query) subunion";
}
return $query;
}
/**
* find fields related to a rule group.
*
* @param array $params contains the rule group property to identify rule group
*
* @return array
* rule fields array associated to rule group
*/
public static function dedupeRuleFields($params) {
$rgBao = new CRM_Dedupe_BAO_RuleGroup();
$rgBao->used = $params['used'];
$rgBao->contact_type = $params['contact_type'];
$rgBao->find(TRUE);
$ruleBao = new CRM_Dedupe_BAO_Rule();
$ruleBao->dedupe_rule_group_id = $rgBao->id;
$ruleBao->find();
$ruleFields = array();
while ($ruleBao->fetch()) {
$ruleFields[] = $ruleBao->rule_field;
}
return $ruleFields;
}
/**
* @param int $cid
* @param int $oid
*
* @return bool
*/
public static function validateContacts($cid, $oid) {
if (!$cid || !$oid) {
return NULL;
}
$exception = new CRM_Dedupe_DAO_Exception();
$exception->contact_id1 = $cid;
$exception->contact_id2 = $oid;
//make sure contact2 > contact1.
if ($cid > $oid) {
$exception->contact_id1 = $oid;
$exception->contact_id2 = $cid;
}
return $exception->find(TRUE) ? FALSE : TRUE;
}
}

View file

@ -0,0 +1,514 @@
<?php
/*
+--------------------------------------------------------------------+
| CiviCRM version 4.7 |
+--------------------------------------------------------------------+
| Copyright CiviCRM LLC (c) 2004-2017 |
+--------------------------------------------------------------------+
| This file is a part of CiviCRM. |
| |
| CiviCRM is free software; you can copy, modify, and distribute it |
| under the terms of the GNU Affero General Public License |
| Version 3, 19 November 2007 and the CiviCRM Licensing Exception. |
| |
| CiviCRM is distributed in the hope that it will be useful, but |
| WITHOUT ANY WARRANTY; without even the implied warranty of |
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. |
| See the GNU Affero General Public License for more details. |
| |
| You should have received a copy of the GNU Affero General Public |
| License and the CiviCRM Licensing Exception along |
| with this program; if not, contact CiviCRM LLC |
| at info[AT]civicrm[DOT]org. If you have questions about the |
| GNU Affero General Public License or the licensing of CiviCRM, |
| see the CiviCRM license FAQ at http://civicrm.org/licensing |
+--------------------------------------------------------------------+
*/
/**
*
* @package CRM
* @copyright CiviCRM LLC (c) 2004-2017
* $Id$
*
*/
/**
* The CiviCRM duplicate discovery engine is based on an
* algorithm designed by David Strauss <david@fourkitchens.com>.
*/
class CRM_Dedupe_BAO_RuleGroup extends CRM_Dedupe_DAO_RuleGroup {
/**
* Ids of the contacts to limit the SQL queries (whole-database queries otherwise)
*/
var $contactIds = array();
/**
* Set the contact IDs to restrict the dedupe to.
*
* @param array $contactIds
*/
public function setContactIds($contactIds) {
$this->contactIds = $contactIds;
}
/**
* Params to dedupe against (queries against the whole contact set otherwise)
*/
var $params = array();
/**
* If there are no rules in rule group.
*/
var $noRules = FALSE;
/**
* Return a structure holding the supported tables, fields and their titles
*
* @param string $requestedType
* The requested contact type.
*
* @return array
* a table-keyed array of field-keyed arrays holding supported fields' titles
*/
public static function &supportedFields($requestedType) {
static $fields = NULL;
if (!$fields) {
// this is needed, as we're piggy-backing importableFields() below
$replacements = array(
'civicrm_country.name' => 'civicrm_address.country_id',
'civicrm_county.name' => 'civicrm_address.county_id',
'civicrm_state_province.name' => 'civicrm_address.state_province_id',
'gender.label' => 'civicrm_contact.gender_id',
'individual_prefix.label' => 'civicrm_contact.prefix_id',
'individual_suffix.label' => 'civicrm_contact.suffix_id',
'addressee.label' => 'civicrm_contact.addressee_id',
'email_greeting.label' => 'civicrm_contact.email_greeting_id',
'postal_greeting.label' => 'civicrm_contact.postal_greeting_id',
'civicrm_phone.phone' => 'civicrm_phone.phone_numeric',
);
// the table names we support in dedupe rules - a filter for importableFields()
$supportedTables = array(
'civicrm_address',
'civicrm_contact',
'civicrm_email',
'civicrm_im',
'civicrm_note',
'civicrm_openid',
'civicrm_phone',
);
foreach (array('Individual', 'Organization', 'Household') as $ctype) {
// take the table.field pairs and their titles from importableFields() if the table is supported
foreach (CRM_Contact_BAO_Contact::importableFields($ctype) as $iField) {
if (isset($iField['where'])) {
$where = $iField['where'];
if (isset($replacements[$where])) {
$where = $replacements[$where];
}
list($table, $field) = explode('.', $where);
if (!in_array($table, $supportedTables)) {
continue;
}
$fields[$ctype][$table][$field] = $iField['title'];
}
}
// add custom data fields
foreach (CRM_Core_BAO_CustomGroup::getTree($ctype, NULL, NULL, -1) as $key => $cg) {
if (!is_int($key)) {
continue;
}
foreach ($cg['fields'] as $cf) {
$fields[$ctype][$cg['table_name']][$cf['column_name']] = $cf['label'];
}
}
}
}
CRM_Utils_Hook::dupeQuery(CRM_Core_DAO::$_nullObject, 'supportedFields', $fields);
return $fields[$requestedType];
}
/**
* Return the SQL query for dropping the temporary table.
*/
public function tableDropQuery() {
return 'DROP TEMPORARY TABLE IF EXISTS dedupe';
}
/**
* Return a set of SQL queries whose cummulative weights will mark matched
* records for the RuleGroup::threasholdQuery() to retrieve.
*/
public function tableQuery() {
// make sure we've got a fetched dbrecord, not sure if this is enforced
if (!$this->name == NULL || $this->is_reserved == NULL) {
$this->find(TRUE);
}
// Reserved Rule Groups can optionally get special treatment by
// implementing an optimization class and returning a query array.
if ($this->is_reserved &&
CRM_Utils_File::isIncludable("CRM/Dedupe/BAO/QueryBuilder/{$this->name}.php")
) {
$command = empty($this->params) ? 'internal' : 'record';
$queries = call_user_func(array("CRM_Dedupe_BAO_QueryBuilder_{$this->name}", $command), $this);
}
else {
// All other rule groups have queries generated by the member dedupe
// rules defined in the administrative interface.
// Find all rules contained by this script sorted by weight so that
// their execution can be short circuited on RuleGroup::fillTable()
$bao = new CRM_Dedupe_BAO_Rule();
$bao->dedupe_rule_group_id = $this->id;
$bao->orderBy('rule_weight DESC');
$bao->find();
// Generate a SQL query for each rule in the rule group that is
// tailored to respect the param and contactId options provided.
$queries = array();
while ($bao->fetch()) {
$bao->contactIds = $this->contactIds;
$bao->params = $this->params;
// Skipping empty rules? Empty rules shouldn't exist; why check?
if ($query = $bao->sql()) {
$queries["{$bao->rule_table}.{$bao->rule_field}.{$bao->rule_weight}"] = $query;
}
}
}
// if there are no rules in this rule group
// add an empty query fulfilling the pattern
if (!$queries) {
$this->noRules = TRUE;
return array();
}
return $queries;
}
public function fillTable() {
// get the list of queries handy
$tableQueries = $this->tableQuery();
if ($this->params && !$this->noRules) {
$tempTableQuery = "CREATE TEMPORARY TABLE dedupe (id1 int, weight int, UNIQUE UI_id1 (id1)) ENGINE=InnoDB";
$insertClause = "INSERT INTO dedupe (id1, weight)";
$groupByClause = "GROUP BY id1, weight";
$dupeCopyJoin = " JOIN dedupe_copy ON dedupe_copy.id1 = t1.column WHERE ";
}
else {
$tempTableQuery = "CREATE TEMPORARY TABLE dedupe (id1 int, id2 int, weight int, UNIQUE UI_id1_id2 (id1, id2)) ENGINE=InnoDB";
$insertClause = "INSERT INTO dedupe (id1, id2, weight)";
$groupByClause = "GROUP BY id1, id2, weight";
$dupeCopyJoin = " JOIN dedupe_copy ON dedupe_copy.id1 = t1.column AND dedupe_copy.id2 = t2.column WHERE ";
}
$patternColumn = '/t1.(\w+)/';
$exclWeightSum = array();
// create temp table
$dao = new CRM_Core_DAO();
$dao->query($tempTableQuery);
CRM_Utils_Hook::dupeQuery($this, 'table', $tableQueries);
while (!empty($tableQueries)) {
list($isInclusive, $isDie) = self::isQuerySetInclusive($tableQueries, $this->threshold, $exclWeightSum);
if ($isInclusive) {
// order queries by table count
self::orderByTableCount($tableQueries);
$weightSum = array_sum($exclWeightSum);
$searchWithinDupes = !empty($exclWeightSum) ? 1 : 0;
while (!empty($tableQueries)) {
// extract the next query ( and weight ) to be executed
$fieldWeight = array_keys($tableQueries);
$fieldWeight = $fieldWeight[0];
$query = array_shift($tableQueries);
if ($searchWithinDupes) {
// get prepared to search within already found dupes if $searchWithinDupes flag is set
$dao->query("DROP TEMPORARY TABLE IF EXISTS dedupe_copy");
$dao->query("CREATE TEMPORARY TABLE dedupe_copy SELECT * FROM dedupe WHERE weight >= {$weightSum}");
$dao->free();
preg_match($patternColumn, $query, $matches);
$query = str_replace(' WHERE ', str_replace('column', $matches[1], $dupeCopyJoin), $query);
// CRM-19612: If there's a union, there will be two WHEREs, and you
// can't use the temp table twice.
if (preg_match('/dedupe_copy[\S\s]*(union)[\S\s]*dedupe_copy/i', $query, $matches, PREG_OFFSET_CAPTURE)) {
// Make a second temp table:
$dao->query("DROP TEMPORARY TABLE IF EXISTS dedupe_copy_2");
$dao->query("CREATE TEMPORARY TABLE dedupe_copy_2 SELECT * FROM dedupe WHERE weight >= {$weightSum}");
$dao->free();
// After the union, use that new temp table:
$part1 = substr($query, 0, $matches[1][1]);
$query = $part1 . str_replace('dedupe_copy', 'dedupe_copy_2', substr($query, $matches[1][1]));
}
}
$searchWithinDupes = 1;
// construct and execute the intermediate query
$query = "{$insertClause} {$query} {$groupByClause} ON DUPLICATE KEY UPDATE weight = weight + VALUES(weight)";
$dao->query($query);
// FIXME: we need to be more acurate with affected rows, especially for insert vs duplicate insert.
// And that will help optimize further.
$affectedRows = $dao->affectedRows();
$dao->free();
// In an inclusive situation, failure of any query means no further processing -
if ($affectedRows == 0) {
// reset to make sure no further execution is done.
$tableQueries = array();
break;
}
$weightSum = substr($fieldWeight, strrpos($fieldWeight, '.') + 1) + $weightSum;
}
// An exclusive situation -
}
elseif (!$isDie) {
// since queries are already sorted by weights, we can continue as is
$fieldWeight = array_keys($tableQueries);
$fieldWeight = $fieldWeight[0];
$query = array_shift($tableQueries);
$query = "{$insertClause} {$query} {$groupByClause} ON DUPLICATE KEY UPDATE weight = weight + VALUES(weight)";
$dao->query($query);
if ($dao->affectedRows() >= 1) {
$exclWeightSum[] = substr($fieldWeight, strrpos($fieldWeight, '.') + 1);
}
$dao->free();
}
else {
// its a die situation
break;
}
}
}
/**
* Function to determine if a given query set contains inclusive or exclusive set of weights.
* The function assumes that the query set is already ordered by weight in desc order.
* @param $tableQueries
* @param $threshold
* @param array $exclWeightSum
*
* @return array
*/
public static function isQuerySetInclusive($tableQueries, $threshold, $exclWeightSum = array()) {
$input = array();
foreach ($tableQueries as $key => $query) {
$input[] = substr($key, strrpos($key, '.') + 1);
}
if (!empty($exclWeightSum)) {
$input = array_merge($input, $exclWeightSum);
rsort($input);
}
if (count($input) == 1) {
return array(FALSE, $input[0] < $threshold);
}
$totalCombinations = 0;
for ($i = 0; $i < count($input); $i++) {
$combination = array($input[$i]);
if (array_sum($combination) >= $threshold) {
$totalCombinations++;
continue;
}
for ($j = $i + 1; $j < count($input); $j++) {
$combination[] = $input[$j];
if (array_sum($combination) >= $threshold) {
$totalCombinations++;
}
}
}
return array($totalCombinations == 1, $totalCombinations <= 0);
}
/**
* sort queries by number of records for the table associated with them.
* @param $tableQueries
*/
public static function orderByTableCount(&$tableQueries) {
static $tableCount = array();
$tempArray = array();
foreach ($tableQueries as $key => $query) {
$table = explode(".", $key);
$table = $table[0];
if (!array_key_exists($table, $tableCount)) {
$query = "SELECT COUNT(*) FROM {$table}";
$tableCount[$table] = CRM_Core_DAO::singleValueQuery($query);
}
$tempArray[$key] = $tableCount[$table];
}
asort($tempArray);
foreach ($tempArray as $key => $count) {
$tempArray[$key] = $tableQueries[$key];
}
$tableQueries = $tempArray;
}
/**
* Return the SQL query for getting only the interesting results out of the dedupe table.
*
* @$checkPermission boolean $params a flag to indicate if permission should be considered.
* default is to always check permissioning but public pages for example might not want
* permission to be checked for anonymous users. Refer CRM-6211. We might be beaking
* Multi-Site dedupe for public pages.
*
* @param bool $checkPermission
*
* @return string
*/
public function thresholdQuery($checkPermission = TRUE) {
$this->_aclFrom = '';
// CRM-6603: anonymous dupechecks side-step ACLs
$this->_aclWhere = ' AND is_deleted = 0 ';
if ($this->params && !$this->noRules) {
if ($checkPermission) {
list($this->_aclFrom, $this->_aclWhere) = CRM_Contact_BAO_Contact_Permission::cacheClause('civicrm_contact');
$this->_aclWhere = $this->_aclWhere ? "AND {$this->_aclWhere}" : '';
}
$query = "SELECT dedupe.id1 as id
FROM dedupe JOIN civicrm_contact ON dedupe.id1 = civicrm_contact.id {$this->_aclFrom}
WHERE contact_type = '{$this->contact_type}' {$this->_aclWhere}
AND weight >= {$this->threshold}";
}
else {
$this->_aclWhere = ' AND c1.is_deleted = 0 AND c2.is_deleted = 0';
if ($checkPermission) {
list($this->_aclFrom, $this->_aclWhere) = CRM_Contact_BAO_Contact_Permission::cacheClause(array('c1', 'c2'));
$this->_aclWhere = $this->_aclWhere ? "AND {$this->_aclWhere}" : '';
}
$query = "SELECT IF(dedupe.id1 < dedupe.id2, dedupe.id1, dedupe.id2) as id1,
IF(dedupe.id1 < dedupe.id2, dedupe.id2, dedupe.id1) as id2, dedupe.weight
FROM dedupe JOIN civicrm_contact c1 ON dedupe.id1 = c1.id
JOIN civicrm_contact c2 ON dedupe.id2 = c2.id {$this->_aclFrom}
LEFT JOIN civicrm_dedupe_exception exc ON dedupe.id1 = exc.contact_id1 AND dedupe.id2 = exc.contact_id2
WHERE c1.contact_type = '{$this->contact_type}' AND
c2.contact_type = '{$this->contact_type}' {$this->_aclWhere}
AND weight >= {$this->threshold} AND exc.contact_id1 IS NULL";
}
CRM_Utils_Hook::dupeQuery($this, 'threshold', $query);
return $query;
}
/**
* find fields related to a rule group.
*
* @param array $params
*
* @return array
* (rule field => weight) array and threshold associated to rule group
*/
public static function dedupeRuleFieldsWeight($params) {
$rgBao = new CRM_Dedupe_BAO_RuleGroup();
$rgBao->contact_type = $params['contact_type'];
if (!empty($params['id'])) {
// accept an ID if provided
$rgBao->id = $params['id'];
}
else {
$rgBao->used = $params['used'];
}
$rgBao->find(TRUE);
$ruleBao = new CRM_Dedupe_BAO_Rule();
$ruleBao->dedupe_rule_group_id = $rgBao->id;
$ruleBao->find();
$ruleFields = array();
while ($ruleBao->fetch()) {
$ruleFields[$ruleBao->rule_field] = $ruleBao->rule_weight;
}
return array($ruleFields, $rgBao->threshold);
}
/**
* Get all of the combinations of fields that would work with a rule.
*
* @param array $rgFields
* @param int $threshold
* @param array $combos
* @param array $running
*/
public static function combos($rgFields, $threshold, &$combos, $running = array()) {
foreach ($rgFields as $rgField => $weight) {
unset($rgFields[$rgField]);
$diff = $threshold - $weight;
$runningnow = $running;
$runningnow[] = $rgField;
if ($diff > 0) {
self::combos($rgFields, $diff, $combos, $runningnow);
}
else {
$combos[] = $runningnow;
}
}
}
/**
* Get an array of rule group id to rule group name
* for all th groups for that contactType. If contactType
* not specified, do it for all
*
* @param string $contactType
* Individual, Household or Organization.
*
*
* @return array
* id => "nice name" of rule group
*/
public static function getByType($contactType = NULL) {
$dao = new CRM_Dedupe_DAO_RuleGroup();
if ($contactType) {
$dao->contact_type = $contactType;
}
$dao->find();
$result = array();
while ($dao->fetch()) {
$title = !empty($dao->title) ? $dao->title : (!empty($dao->name) ? $dao->name : $dao->contact_type);
$name = "$title - {$dao->used}";
$result[$dao->id] = $name;
}
return $result;
}
/**
* Get the cached contact type for a particular rule group.
*
* @param int $rule_group_id
*
* @return string
*/
public static function getContactTypeForRuleGroup($rule_group_id) {
if (!isset(\Civi::$statics[__CLASS__]) || !isset(\Civi::$statics[__CLASS__]['rule_groups'])) {
\Civi::$statics[__CLASS__]['rule_groups'] = array();
}
if (empty(\Civi::$statics[__CLASS__]['rule_groups'][$rule_group_id])) {
\Civi::$statics[__CLASS__]['rule_groups'][$rule_group_id]['contact_type'] = CRM_Core_DAO::getFieldValue(
'CRM_Dedupe_DAO_RuleGroup',
$rule_group_id,
'contact_type'
);
}
return \Civi::$statics[__CLASS__]['rule_groups'][$rule_group_id]['contact_type'];
}
}

View file

@ -0,0 +1,207 @@
<?php
/*
+--------------------------------------------------------------------+
| CiviCRM version 4.7 |
+--------------------------------------------------------------------+
| Copyright CiviCRM LLC (c) 2004-2017 |
+--------------------------------------------------------------------+
| This file is a part of CiviCRM. |
| |
| CiviCRM is free software; you can copy, modify, and distribute it |
| under the terms of the GNU Affero General Public License |
| Version 3, 19 November 2007 and the CiviCRM Licensing Exception. |
| |
| CiviCRM is distributed in the hope that it will be useful, but |
| WITHOUT ANY WARRANTY; without even the implied warranty of |
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. |
| See the GNU Affero General Public License for more details. |
| |
| You should have received a copy of the GNU Affero General Public |
| License and the CiviCRM Licensing Exception along |
| with this program; if not, contact CiviCRM LLC |
| at info[AT]civicrm[DOT]org. If you have questions about the |
| GNU Affero General Public License or the licensing of CiviCRM, |
| see the CiviCRM license FAQ at http://civicrm.org/licensing |
+--------------------------------------------------------------------+
*/
/**
* @package CRM
* @copyright CiviCRM LLC (c) 2004-2017
*
* Generated from xml/schema/CRM/Dedupe/Exception.xml
* DO NOT EDIT. Generated by CRM_Core_CodeGen
* (GenCodeChecksum:5e9f138ebec5aa2fcfd30120dffacdf5)
*/
require_once 'CRM/Core/DAO.php';
require_once 'CRM/Utils/Type.php';
/**
* CRM_Dedupe_DAO_Exception constructor.
*/
class CRM_Dedupe_DAO_Exception extends CRM_Core_DAO {
/**
* Static instance to hold the table name.
*
* @var string
*/
static $_tableName = 'civicrm_dedupe_exception';
/**
* Should CiviCRM log any modifications to this table in the civicrm_log table.
*
* @var boolean
*/
static $_log = false;
/**
* Unique dedupe exception id
*
* @var int unsigned
*/
public $id;
/**
* FK to Contact ID
*
* @var int unsigned
*/
public $contact_id1;
/**
* FK to Contact ID
*
* @var int unsigned
*/
public $contact_id2;
/**
* Class constructor.
*/
function __construct() {
$this->__table = 'civicrm_dedupe_exception';
parent::__construct();
}
/**
* Returns foreign keys and entity references.
*
* @return array
* [CRM_Core_Reference_Interface]
*/
static function getReferenceColumns() {
if (!isset(Civi::$statics[__CLASS__]['links'])) {
Civi::$statics[__CLASS__]['links'] = static ::createReferenceColumns(__CLASS__);
Civi::$statics[__CLASS__]['links'][] = new CRM_Core_Reference_Basic(self::getTableName() , 'contact_id1', 'civicrm_contact', 'id');
Civi::$statics[__CLASS__]['links'][] = new CRM_Core_Reference_Basic(self::getTableName() , 'contact_id2', 'civicrm_contact', 'id');
CRM_Core_DAO_AllCoreTables::invoke(__CLASS__, 'links_callback', Civi::$statics[__CLASS__]['links']);
}
return Civi::$statics[__CLASS__]['links'];
}
/**
* Returns all the column names of this table
*
* @return array
*/
static function &fields() {
if (!isset(Civi::$statics[__CLASS__]['fields'])) {
Civi::$statics[__CLASS__]['fields'] = array(
'id' => array(
'name' => 'id',
'type' => CRM_Utils_Type::T_INT,
'title' => ts('Dedupe Exception ID') ,
'description' => 'Unique dedupe exception id',
'required' => true,
'table_name' => 'civicrm_dedupe_exception',
'entity' => 'Exception',
'bao' => 'CRM_Dedupe_DAO_Exception',
'localizable' => 0,
) ,
'contact_id1' => array(
'name' => 'contact_id1',
'type' => CRM_Utils_Type::T_INT,
'title' => ts('First Dupe Contact ID') ,
'description' => 'FK to Contact ID',
'table_name' => 'civicrm_dedupe_exception',
'entity' => 'Exception',
'bao' => 'CRM_Dedupe_DAO_Exception',
'localizable' => 0,
'FKClassName' => 'CRM_Contact_DAO_Contact',
) ,
'contact_id2' => array(
'name' => 'contact_id2',
'type' => CRM_Utils_Type::T_INT,
'title' => ts('Second Dupe Contact ID') ,
'description' => 'FK to Contact ID',
'table_name' => 'civicrm_dedupe_exception',
'entity' => 'Exception',
'bao' => 'CRM_Dedupe_DAO_Exception',
'localizable' => 0,
'FKClassName' => 'CRM_Contact_DAO_Contact',
) ,
);
CRM_Core_DAO_AllCoreTables::invoke(__CLASS__, 'fields_callback', Civi::$statics[__CLASS__]['fields']);
}
return Civi::$statics[__CLASS__]['fields'];
}
/**
* Return a mapping from field-name to the corresponding key (as used in fields()).
*
* @return array
* Array(string $name => string $uniqueName).
*/
static function &fieldKeys() {
if (!isset(Civi::$statics[__CLASS__]['fieldKeys'])) {
Civi::$statics[__CLASS__]['fieldKeys'] = array_flip(CRM_Utils_Array::collect('name', self::fields()));
}
return Civi::$statics[__CLASS__]['fieldKeys'];
}
/**
* Returns the names of this table
*
* @return string
*/
static function getTableName() {
return self::$_tableName;
}
/**
* Returns if this table needs to be logged
*
* @return boolean
*/
function getLog() {
return self::$_log;
}
/**
* Returns the list of fields that can be imported
*
* @param bool $prefix
*
* @return array
*/
static function &import($prefix = false) {
$r = CRM_Core_DAO_AllCoreTables::getImports(__CLASS__, 'dedupe_exception', $prefix, array());
return $r;
}
/**
* Returns the list of fields that can be exported
*
* @param bool $prefix
*
* @return array
*/
static function &export($prefix = false) {
$r = CRM_Core_DAO_AllCoreTables::getExports(__CLASS__, 'dedupe_exception', $prefix, array());
return $r;
}
/**
* Returns the list of indices
*/
public static function indices($localize = TRUE) {
$indices = array(
'UI_contact_id1_contact_id2' => array(
'name' => 'UI_contact_id1_contact_id2',
'field' => array(
0 => 'contact_id1',
1 => 'contact_id2',
) ,
'localizable' => false,
'unique' => true,
'sig' => 'civicrm_dedupe_exception::1::contact_id1::contact_id2',
) ,
);
return ($localize && !empty($indices)) ? CRM_Core_DAO_AllCoreTables::multilingualize(__CLASS__, $indices) : $indices;
}
}

View file

@ -0,0 +1,256 @@
<?php
/*
+--------------------------------------------------------------------+
| CiviCRM version 4.7 |
+--------------------------------------------------------------------+
| Copyright CiviCRM LLC (c) 2004-2017 |
+--------------------------------------------------------------------+
| This file is a part of CiviCRM. |
| |
| CiviCRM is free software; you can copy, modify, and distribute it |
| under the terms of the GNU Affero General Public License |
| Version 3, 19 November 2007 and the CiviCRM Licensing Exception. |
| |
| CiviCRM is distributed in the hope that it will be useful, but |
| WITHOUT ANY WARRANTY; without even the implied warranty of |
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. |
| See the GNU Affero General Public License for more details. |
| |
| You should have received a copy of the GNU Affero General Public |
| License and the CiviCRM Licensing Exception along |
| with this program; if not, contact CiviCRM LLC |
| at info[AT]civicrm[DOT]org. If you have questions about the |
| GNU Affero General Public License or the licensing of CiviCRM, |
| see the CiviCRM license FAQ at http://civicrm.org/licensing |
+--------------------------------------------------------------------+
*/
/**
* @package CRM
* @copyright CiviCRM LLC (c) 2004-2017
*
* Generated from xml/schema/CRM/Dedupe/Rule.xml
* DO NOT EDIT. Generated by CRM_Core_CodeGen
* (GenCodeChecksum:d4069481ff6f6925ed299206f67ab01e)
*/
require_once 'CRM/Core/DAO.php';
require_once 'CRM/Utils/Type.php';
/**
* CRM_Dedupe_DAO_Rule constructor.
*/
class CRM_Dedupe_DAO_Rule extends CRM_Core_DAO {
/**
* Static instance to hold the table name.
*
* @var string
*/
static $_tableName = 'civicrm_dedupe_rule';
/**
* Should CiviCRM log any modifications to this table in the civicrm_log table.
*
* @var boolean
*/
static $_log = false;
/**
* Unique dedupe rule id
*
* @var int unsigned
*/
public $id;
/**
* The id of the rule group this rule belongs to
*
* @var int unsigned
*/
public $dedupe_rule_group_id;
/**
* The name of the table this rule is about
*
* @var string
*/
public $rule_table;
/**
* The name of the field of the table referenced in rule_table
*
* @var string
*/
public $rule_field;
/**
* The length of the matching substring
*
* @var int unsigned
*/
public $rule_length;
/**
* The weight of the rule
*
* @var int
*/
public $rule_weight;
/**
* Class constructor.
*/
function __construct() {
$this->__table = 'civicrm_dedupe_rule';
parent::__construct();
}
/**
* Returns foreign keys and entity references.
*
* @return array
* [CRM_Core_Reference_Interface]
*/
static function getReferenceColumns() {
if (!isset(Civi::$statics[__CLASS__]['links'])) {
Civi::$statics[__CLASS__]['links'] = static ::createReferenceColumns(__CLASS__);
Civi::$statics[__CLASS__]['links'][] = new CRM_Core_Reference_Basic(self::getTableName() , 'dedupe_rule_group_id', 'civicrm_dedupe_rule_group', 'id');
CRM_Core_DAO_AllCoreTables::invoke(__CLASS__, 'links_callback', Civi::$statics[__CLASS__]['links']);
}
return Civi::$statics[__CLASS__]['links'];
}
/**
* Returns all the column names of this table
*
* @return array
*/
static function &fields() {
if (!isset(Civi::$statics[__CLASS__]['fields'])) {
Civi::$statics[__CLASS__]['fields'] = array(
'id' => array(
'name' => 'id',
'type' => CRM_Utils_Type::T_INT,
'title' => ts('Dedupe Rule ID') ,
'description' => 'Unique dedupe rule id',
'required' => true,
'table_name' => 'civicrm_dedupe_rule',
'entity' => 'Rule',
'bao' => 'CRM_Dedupe_BAO_Rule',
'localizable' => 0,
) ,
'dedupe_rule_group_id' => array(
'name' => 'dedupe_rule_group_id',
'type' => CRM_Utils_Type::T_INT,
'title' => ts('Dedupe Rule Group') ,
'description' => 'The id of the rule group this rule belongs to',
'required' => true,
'table_name' => 'civicrm_dedupe_rule',
'entity' => 'Rule',
'bao' => 'CRM_Dedupe_BAO_Rule',
'localizable' => 0,
'FKClassName' => 'CRM_Dedupe_DAO_RuleGroup',
) ,
'rule_table' => array(
'name' => 'rule_table',
'type' => CRM_Utils_Type::T_STRING,
'title' => ts('Rule Table') ,
'description' => 'The name of the table this rule is about',
'required' => true,
'maxlength' => 64,
'size' => CRM_Utils_Type::BIG,
'table_name' => 'civicrm_dedupe_rule',
'entity' => 'Rule',
'bao' => 'CRM_Dedupe_BAO_Rule',
'localizable' => 0,
) ,
'rule_field' => array(
'name' => 'rule_field',
'type' => CRM_Utils_Type::T_STRING,
'title' => ts('Rule Field') ,
'description' => 'The name of the field of the table referenced in rule_table',
'required' => true,
'maxlength' => 64,
'size' => CRM_Utils_Type::BIG,
'table_name' => 'civicrm_dedupe_rule',
'entity' => 'Rule',
'bao' => 'CRM_Dedupe_BAO_Rule',
'localizable' => 0,
) ,
'rule_length' => array(
'name' => 'rule_length',
'type' => CRM_Utils_Type::T_INT,
'title' => ts('Rule Length') ,
'description' => 'The length of the matching substring',
'table_name' => 'civicrm_dedupe_rule',
'entity' => 'Rule',
'bao' => 'CRM_Dedupe_BAO_Rule',
'localizable' => 0,
'html' => array(
'type' => 'Text',
) ,
) ,
'rule_weight' => array(
'name' => 'rule_weight',
'type' => CRM_Utils_Type::T_INT,
'title' => ts('Order') ,
'description' => 'The weight of the rule',
'required' => true,
'table_name' => 'civicrm_dedupe_rule',
'entity' => 'Rule',
'bao' => 'CRM_Dedupe_BAO_Rule',
'localizable' => 0,
'html' => array(
'type' => 'Text',
) ,
) ,
);
CRM_Core_DAO_AllCoreTables::invoke(__CLASS__, 'fields_callback', Civi::$statics[__CLASS__]['fields']);
}
return Civi::$statics[__CLASS__]['fields'];
}
/**
* Return a mapping from field-name to the corresponding key (as used in fields()).
*
* @return array
* Array(string $name => string $uniqueName).
*/
static function &fieldKeys() {
if (!isset(Civi::$statics[__CLASS__]['fieldKeys'])) {
Civi::$statics[__CLASS__]['fieldKeys'] = array_flip(CRM_Utils_Array::collect('name', self::fields()));
}
return Civi::$statics[__CLASS__]['fieldKeys'];
}
/**
* Returns the names of this table
*
* @return string
*/
static function getTableName() {
return self::$_tableName;
}
/**
* Returns if this table needs to be logged
*
* @return boolean
*/
function getLog() {
return self::$_log;
}
/**
* Returns the list of fields that can be imported
*
* @param bool $prefix
*
* @return array
*/
static function &import($prefix = false) {
$r = CRM_Core_DAO_AllCoreTables::getImports(__CLASS__, 'dedupe_rule', $prefix, array());
return $r;
}
/**
* Returns the list of fields that can be exported
*
* @param bool $prefix
*
* @return array
*/
static function &export($prefix = false) {
$r = CRM_Core_DAO_AllCoreTables::getExports(__CLASS__, 'dedupe_rule', $prefix, array());
return $r;
}
/**
* Returns the list of indices
*/
public static function indices($localize = TRUE) {
$indices = array();
return ($localize && !empty($indices)) ? CRM_Core_DAO_AllCoreTables::multilingualize(__CLASS__, $indices) : $indices;
}
}

View file

@ -0,0 +1,277 @@
<?php
/*
+--------------------------------------------------------------------+
| CiviCRM version 4.7 |
+--------------------------------------------------------------------+
| Copyright CiviCRM LLC (c) 2004-2017 |
+--------------------------------------------------------------------+
| This file is a part of CiviCRM. |
| |
| CiviCRM is free software; you can copy, modify, and distribute it |
| under the terms of the GNU Affero General Public License |
| Version 3, 19 November 2007 and the CiviCRM Licensing Exception. |
| |
| CiviCRM is distributed in the hope that it will be useful, but |
| WITHOUT ANY WARRANTY; without even the implied warranty of |
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. |
| See the GNU Affero General Public License for more details. |
| |
| You should have received a copy of the GNU Affero General Public |
| License and the CiviCRM Licensing Exception along |
| with this program; if not, contact CiviCRM LLC |
| at info[AT]civicrm[DOT]org. If you have questions about the |
| GNU Affero General Public License or the licensing of CiviCRM, |
| see the CiviCRM license FAQ at http://civicrm.org/licensing |
+--------------------------------------------------------------------+
*/
/**
* @package CRM
* @copyright CiviCRM LLC (c) 2004-2017
*
* Generated from xml/schema/CRM/Dedupe/RuleGroup.xml
* DO NOT EDIT. Generated by CRM_Core_CodeGen
* (GenCodeChecksum:0f103fb52cf7d96d601d83e8417f77ad)
*/
require_once 'CRM/Core/DAO.php';
require_once 'CRM/Utils/Type.php';
/**
* CRM_Dedupe_DAO_RuleGroup constructor.
*/
class CRM_Dedupe_DAO_RuleGroup extends CRM_Core_DAO {
/**
* Static instance to hold the table name.
*
* @var string
*/
static $_tableName = 'civicrm_dedupe_rule_group';
/**
* Should CiviCRM log any modifications to this table in the civicrm_log table.
*
* @var boolean
*/
static $_log = false;
/**
* Unique dedupe rule group id
*
* @var int unsigned
*/
public $id;
/**
* The type of contacts this group applies to
*
* @var string
*/
public $contact_type;
/**
* The weight threshold the sum of the rule weights has to cross to consider two contacts the same
*
* @var int
*/
public $threshold;
/**
* Whether the rule should be used for cases where usage is Unsupervised, Supervised OR General(programatically)
*
* @var string
*/
public $used;
/**
* Name of the rule group
*
* @var string
*/
public $name;
/**
* Label of the rule group
*
* @var string
*/
public $title;
/**
* Is this a reserved rule - a rule group that has been optimized and cannot be changed by the admin
*
* @var boolean
*/
public $is_reserved;
/**
* Class constructor.
*/
function __construct() {
$this->__table = 'civicrm_dedupe_rule_group';
parent::__construct();
}
/**
* Returns all the column names of this table
*
* @return array
*/
static function &fields() {
if (!isset(Civi::$statics[__CLASS__]['fields'])) {
Civi::$statics[__CLASS__]['fields'] = array(
'id' => array(
'name' => 'id',
'type' => CRM_Utils_Type::T_INT,
'title' => ts('Rule Group ID') ,
'description' => 'Unique dedupe rule group id',
'required' => true,
'table_name' => 'civicrm_dedupe_rule_group',
'entity' => 'RuleGroup',
'bao' => 'CRM_Dedupe_BAO_RuleGroup',
'localizable' => 0,
) ,
'contact_type' => array(
'name' => 'contact_type',
'type' => CRM_Utils_Type::T_STRING,
'title' => ts('Contact Type') ,
'description' => 'The type of contacts this group applies to',
'maxlength' => 12,
'size' => CRM_Utils_Type::TWELVE,
'table_name' => 'civicrm_dedupe_rule_group',
'entity' => 'RuleGroup',
'bao' => 'CRM_Dedupe_BAO_RuleGroup',
'localizable' => 0,
'html' => array(
'type' => 'Select',
) ,
'pseudoconstant' => array(
'table' => 'civicrm_contact_type',
'keyColumn' => 'name',
'labelColumn' => 'label',
'condition' => 'parent_id IS NULL',
)
) ,
'threshold' => array(
'name' => 'threshold',
'type' => CRM_Utils_Type::T_INT,
'title' => ts('Threshold') ,
'description' => 'The weight threshold the sum of the rule weights has to cross to consider two contacts the same',
'required' => true,
'table_name' => 'civicrm_dedupe_rule_group',
'entity' => 'RuleGroup',
'bao' => 'CRM_Dedupe_BAO_RuleGroup',
'localizable' => 0,
'html' => array(
'type' => 'Text',
) ,
) ,
'used' => array(
'name' => 'used',
'type' => CRM_Utils_Type::T_STRING,
'title' => ts('Length') ,
'description' => 'Whether the rule should be used for cases where usage is Unsupervised, Supervised OR General(programatically)',
'required' => true,
'maxlength' => 12,
'size' => CRM_Utils_Type::TWELVE,
'table_name' => 'civicrm_dedupe_rule_group',
'entity' => 'RuleGroup',
'bao' => 'CRM_Dedupe_BAO_RuleGroup',
'localizable' => 0,
'html' => array(
'type' => 'Radio',
) ,
'pseudoconstant' => array(
'callback' => 'CRM_Core_SelectValues::getDedupeRuleTypes',
)
) ,
'name' => array(
'name' => 'name',
'type' => CRM_Utils_Type::T_STRING,
'title' => ts('Name') ,
'description' => 'Name of the rule group',
'maxlength' => 64,
'size' => CRM_Utils_Type::BIG,
'table_name' => 'civicrm_dedupe_rule_group',
'entity' => 'RuleGroup',
'bao' => 'CRM_Dedupe_BAO_RuleGroup',
'localizable' => 0,
) ,
'title' => array(
'name' => 'title',
'type' => CRM_Utils_Type::T_STRING,
'title' => ts('Title') ,
'description' => 'Label of the rule group',
'maxlength' => 255,
'size' => CRM_Utils_Type::HUGE,
'table_name' => 'civicrm_dedupe_rule_group',
'entity' => 'RuleGroup',
'bao' => 'CRM_Dedupe_BAO_RuleGroup',
'localizable' => 0,
'html' => array(
'type' => 'Text',
) ,
) ,
'is_reserved' => array(
'name' => 'is_reserved',
'type' => CRM_Utils_Type::T_BOOLEAN,
'title' => ts('Reserved?') ,
'description' => 'Is this a reserved rule - a rule group that has been optimized and cannot be changed by the admin',
'table_name' => 'civicrm_dedupe_rule_group',
'entity' => 'RuleGroup',
'bao' => 'CRM_Dedupe_BAO_RuleGroup',
'localizable' => 0,
'html' => array(
'type' => 'CheckBox',
) ,
) ,
);
CRM_Core_DAO_AllCoreTables::invoke(__CLASS__, 'fields_callback', Civi::$statics[__CLASS__]['fields']);
}
return Civi::$statics[__CLASS__]['fields'];
}
/**
* Return a mapping from field-name to the corresponding key (as used in fields()).
*
* @return array
* Array(string $name => string $uniqueName).
*/
static function &fieldKeys() {
if (!isset(Civi::$statics[__CLASS__]['fieldKeys'])) {
Civi::$statics[__CLASS__]['fieldKeys'] = array_flip(CRM_Utils_Array::collect('name', self::fields()));
}
return Civi::$statics[__CLASS__]['fieldKeys'];
}
/**
* Returns the names of this table
*
* @return string
*/
static function getTableName() {
return self::$_tableName;
}
/**
* Returns if this table needs to be logged
*
* @return boolean
*/
function getLog() {
return self::$_log;
}
/**
* Returns the list of fields that can be imported
*
* @param bool $prefix
*
* @return array
*/
static function &import($prefix = false) {
$r = CRM_Core_DAO_AllCoreTables::getImports(__CLASS__, 'dedupe_rule_group', $prefix, array());
return $r;
}
/**
* Returns the list of fields that can be exported
*
* @param bool $prefix
*
* @return array
*/
static function &export($prefix = false) {
$r = CRM_Core_DAO_AllCoreTables::getExports(__CLASS__, 'dedupe_rule_group', $prefix, array());
return $r;
}
/**
* Returns the list of indices
*/
public static function indices($localize = TRUE) {
$indices = array();
return ($localize && !empty($indices)) ? CRM_Core_DAO_AllCoreTables::multilingualize(__CLASS__, $indices) : $indices;
}
}

View file

@ -0,0 +1,383 @@
<?php
/*
+--------------------------------------------------------------------+
| CiviCRM version 4.7 |
+--------------------------------------------------------------------+
| Copyright CiviCRM LLC (c) 2004-2017 |
+--------------------------------------------------------------------+
| This file is a part of CiviCRM. |
| |
| CiviCRM is free software; you can copy, modify, and distribute it |
| under the terms of the GNU Affero General Public License |
| Version 3, 19 November 2007 and the CiviCRM Licensing Exception. |
| |
| CiviCRM is distributed in the hope that it will be useful, but |
| WITHOUT ANY WARRANTY; without even the implied warranty of |
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. |
| See the GNU Affero General Public License for more details. |
| |
| You should have received a copy of the GNU Affero General Public |
| License and the CiviCRM Licensing Exception along |
| with this program; if not, contact CiviCRM LLC |
| at info[AT]civicrm[DOT]org. If you have questions about the |
| GNU Affero General Public License or the licensing of CiviCRM, |
| see the CiviCRM license FAQ at http://civicrm.org/licensing |
+--------------------------------------------------------------------+
*/
/**
*
* @package CRM
* @copyright CiviCRM LLC (c) 2004-2017
* $Id$
*
*/
/**
* The CiviCRM duplicate discovery engine is based on an
* algorithm designed by David Strauss <david@fourkitchens.com>.
*/
class CRM_Dedupe_Finder {
/**
* Return a contact_id-keyed array of arrays of possible dupes
* (of the key contact_id) - limited to dupes of $cids if provided.
*
* @param int $rgid
* Rule group id.
* @param array $cids
* Contact ids to limit the search to.
*
* @param bool $checkPermissions
* Respect logged in user permissions.
*
* @param int $searchLimit
* Limit for the number of contacts to be used for comparison.
* The search methodology finds all matches for the searchedContacts so this limits
* the number of searched contacts, not the matches found.
*
* @return array
* Array of (cid1, cid2, weight) dupe triples
*
* @throws CiviCRM_API3_Exception
* @throws Exception
*/
public static function dupes($rgid, $cids = array(), $checkPermissions = TRUE, $searchLimit = 0) {
$rgBao = new CRM_Dedupe_BAO_RuleGroup();
$rgBao->id = $rgid;
$rgBao->contactIds = $cids;
if (!$rgBao->find(TRUE)) {
CRM_Core_Error::fatal("Dedupe rule not found for selected contacts");
}
if (empty($rgBao->contactIds) && !empty($searchLimit)) {
$limitedContacts = civicrm_api3('Contact', 'get', array(
'return' => 'id',
'contact_type' => $rgBao->contact_type,
'options' => array('limit' => $searchLimit),
));
$rgBao->contactIds = array_keys($limitedContacts['values']);
}
$rgBao->fillTable();
$dao = new CRM_Core_DAO();
$dao->query($rgBao->thresholdQuery($checkPermissions));
$dupes = array();
while ($dao->fetch()) {
$dupes[] = array($dao->id1, $dao->id2, $dao->weight);
}
$dao->query($rgBao->tableDropQuery());
return $dupes;
}
/**
* Return an array of possible dupes, based on the provided array of
* params, using the default rule group for the given contact type and
* usage.
*
* check_permission is a boolean flag to indicate if permission should be considered.
* default is to always check permissioning but public pages for example might not want
* permission to be checked for anonymous users. Refer CRM-6211. We might be beaking
* Multi-Site dedupe for public pages.
*
* @param array $params
* Array of params of the form $params[$table][$field] == $value.
* @param string $ctype
* Contact type to match against.
* @param string $used
* Dedupe rule group usage ('Unsupervised' or 'Supervised' or 'General').
* @param array $except
* Array of contacts that shouldn't be considered dupes.
* @param int $ruleGroupID
* The id of the dedupe rule we should be using.
*
* @return array
* matching contact ids
*/
public static function dupesByParams(
$params,
$ctype,
$used = 'Unsupervised',
$except = array(),
$ruleGroupID = NULL
) {
// If $params is empty there is zero reason to proceed.
if (!$params) {
return array();
}
$foundByID = FALSE;
if ($ruleGroupID) {
$rgBao = new CRM_Dedupe_BAO_RuleGroup();
$rgBao->id = $ruleGroupID;
$rgBao->contact_type = $ctype;
if ($rgBao->find(TRUE)) {
$foundByID = TRUE;
}
}
if (!$foundByID) {
$rgBao = new CRM_Dedupe_BAO_RuleGroup();
$rgBao->contact_type = $ctype;
$rgBao->used = $used;
if (!$rgBao->find(TRUE)) {
CRM_Core_Error::fatal("$used rule for $ctype does not exist");
}
}
$params['check_permission'] = CRM_Utils_Array::value('check_permission', $params, TRUE);
if (isset($params['civicrm_phone']['phone_numeric'])) {
$orig = $params['civicrm_phone']['phone_numeric'];
$params['civicrm_phone']['phone_numeric'] = preg_replace('/[^\d]/', '', $orig);
}
$rgBao->params = $params;
$rgBao->fillTable();
$dao = new CRM_Core_DAO();
$dao->query($rgBao->thresholdQuery($params['check_permission']));
$dupes = array();
while ($dao->fetch()) {
if (isset($dao->id) && $dao->id) {
$dupes[] = $dao->id;
}
}
$dao->query($rgBao->tableDropQuery());
return array_diff($dupes, $except);
}
/**
* Return a contact_id-keyed array of arrays of possible dupes in the given group.
*
* @param int $rgid
* Rule group id.
* @param int $gid
* Contact group id (currently, works only with non-smart groups).
*
* @param int $searchLimit
* Limit for the number of contacts to be used for comparison.
* The search methodology finds all matches for the searchedContacts so this limits
* the number of searched contacts, not the matches found.
*
* @return array
* array of (cid1, cid2, weight) dupe triples
*/
public static function dupesInGroup($rgid, $gid, $searchLimit = 0) {
$cids = array_keys(CRM_Contact_BAO_Group::getMember($gid, $searchLimit));
if (!empty($cids)) {
return self::dupes($rgid, $cids);
}
return array();
}
/**
* A hackish function needed to massage CRM_Contact_Form_$ctype::formRule()
* object into a valid $params array for dedupe
*
* @param array $fields
* Contact structure from formRule().
* @param string $ctype
* Contact type of the given contact.
*
* @return array
* valid $params array for dedupe
*/
public static function formatParams($fields, $ctype) {
$flat = array();
CRM_Utils_Array::flatten($fields, $flat);
// FIXME: This may no longer be necessary - check inputs
$replace_these = array(
'individual_prefix' => 'prefix_id',
'individual_suffix' => 'suffix_id',
'gender' => 'gender_id',
);
foreach (array('individual_suffix', 'individual_prefix', 'gender') as $name) {
if (!empty($fields[$name])) {
$flat[$replace_these[$name]] = $flat[$name];
unset($flat[$name]);
}
}
// handle {birth,deceased}_date
foreach (array(
'birth_date',
'deceased_date',
) as $date) {
if (!empty($fields[$date])) {
$flat[$date] = $fields[$date];
if (is_array($flat[$date])) {
$flat[$date] = CRM_Utils_Date::format($flat[$date]);
}
$flat[$date] = CRM_Utils_Date::processDate($flat[$date]);
}
}
if (!empty($flat['contact_source'])) {
$flat['source'] = $flat['contact_source'];
unset($flat['contact_source']);
}
// handle preferred_communication_method
if (!empty($fields['preferred_communication_method'])) {
$methods = array_intersect($fields['preferred_communication_method'], array('1'));
$methods = array_keys($methods);
sort($methods);
if ($methods) {
$flat['preferred_communication_method'] = CRM_Core_DAO::VALUE_SEPARATOR . implode(CRM_Core_DAO::VALUE_SEPARATOR, $methods) . CRM_Core_DAO::VALUE_SEPARATOR;
}
}
// handle custom data
$tree = CRM_Core_BAO_CustomGroup::getTree($ctype, NULL, NULL, -1);
CRM_Core_BAO_CustomGroup::postProcess($tree, $fields, TRUE);
foreach ($tree as $key => $cg) {
if (!is_int($key)) {
continue;
}
foreach ($cg['fields'] as $cf) {
$flat[$cf['column_name']] = CRM_Utils_Array::value('data', $cf['customValue']);
}
}
// if the key is dotted, keep just the last part of it
foreach ($flat as $key => $value) {
if (substr_count($key, '.')) {
$last = explode('.', $key);
$last = array_pop($last);
// make sure the first occurrence is kept, not the last
if (!isset($flat[$last])) {
$flat[$last] = $value;
}
unset($flat[$key]);
}
}
// drop the -digit (and -Primary, for CRM-3902) postfixes (so event registration's $flat['email-5'] becomes $flat['email'])
// FIXME: CRM-5026 should be fixed here; the below clobbers all address info; we should split off address fields and match
// the -digit to civicrm_address.location_type_id and -Primary to civicrm_address.is_primary
foreach ($flat as $key => $value) {
$matches = array();
if (preg_match('/(.*)-(Primary-[\d+])$|(.*)-(\d+|Primary)$/', $key, $matches)) {
$return = array_values(array_filter($matches));
$flat[$return[1]] = $value;
unset($flat[$key]);
}
}
$params = array();
$supportedFields = CRM_Dedupe_BAO_RuleGroup::supportedFields($ctype);
if (is_array($supportedFields)) {
foreach ($supportedFields as $table => $fields) {
if ($table == 'civicrm_address') {
// for matching on civicrm_address fields, we also need the location_type_id
$fields['location_type_id'] = '';
// FIXME: we also need to do some hacking for id and name fields, see CRM-3902s comments
$fixes = array(
'address_name' => 'name',
'country' => 'country_id',
'state_province' => 'state_province_id',
'county' => 'county_id',
);
foreach ($fixes as $orig => $target) {
if (!empty($flat[$orig])) {
$params[$table][$target] = $flat[$orig];
}
}
}
if ($table == 'civicrm_phone') {
$fixes = array(
'phone' => 'phone_numeric',
);
foreach ($fixes as $orig => $target) {
if (!empty($flat[$orig])) {
$params[$table][$target] = $flat[$orig];
}
}
}
foreach ($fields as $field => $title) {
if (!empty($flat[$field])) {
$params[$table][$field] = $flat[$field];
}
}
}
}
return $params;
}
/**
* Parse duplicate pairs into a standardised array and store in the prev_next_cache.
*
* @param array $foundDupes
* @param string $cacheKeyString
*
* @return array Dupe pairs with the keys
* Dupe pairs with the keys
* -srcID
* -srcName
* -dstID
* -dstName
* -weight
* -canMerge
*
* @throws CRM_Core_Exception
*/
public static function parseAndStoreDupePairs($foundDupes, $cacheKeyString) {
$cids = array();
foreach ($foundDupes as $dupe) {
$cids[$dupe[0]] = 1;
$cids[$dupe[1]] = 1;
}
$cidString = implode(', ', array_keys($cids));
$dao = CRM_Core_DAO::executeQuery("SELECT id, display_name FROM civicrm_contact WHERE id IN ($cidString) ORDER BY sort_name");
$displayNames = array();
while ($dao->fetch()) {
$displayNames[$dao->id] = $dao->display_name;
}
$userId = CRM_Core_Session::getLoggedInContactID();
foreach ($foundDupes as $dupes) {
$srcID = $dupes[1];
$dstID = $dupes[0];
// The logged in user should never be the src (ie. the contact to be removed).
if ($srcID == $userId) {
$srcID = $dstID;
$dstID = $userId;
}
$mainContacts[] = $row = array(
'dstID' => $dstID,
'dstName' => $displayNames[$dstID],
'srcID' => $srcID,
'srcName' => $displayNames[$srcID],
'weight' => $dupes[2],
'canMerge' => TRUE,
);
$data = CRM_Core_DAO::escapeString(serialize($row));
$values[] = " ( 'civicrm_contact', $dstID, $srcID, '$cacheKeyString', '$data' ) ";
}
CRM_Core_BAO_PrevNextCache::setItem($values);
return $mainContacts;
}
}

File diff suppressed because it is too large Load diff