Commit 62e76ffc authored by John Fiala's avatar John Fiala
Browse files

Issue #2223571 by jcfiala:Back-ported configurable domain support from 7.x-1.x...

Issue #2223571 by jcfiala:Back-ported configurable domain support from 7.x-1.x version, added more tlds to default list.
parent 6d9890b5
......@@ -238,7 +238,8 @@ function link_cleanup_url($url, $protocol = "http") {
$protocol_match = preg_match("/^([a-z0-9][a-z0-9\.\-_]*:\/\/)/i", $url);
if (empty($protocol_match)) {
// But should there be? Add an automatic http:// if it starts with a domain name.
$domain_match = preg_match('/^(([a-z0-9]([a-z0-9\-_]*\.)+)('. LINK_DOMAINS .'|[a-z]{2}))/i', $url);
$link_domains = _link_domains();
$domain_match = preg_match('/^(([a-z0-9]([a-z0-9\-_]*\.)+)('. $link_domains .'|[a-z]{2}))/i', $url);
if (!empty($domain_match)) {
$url = $protocol ."://". $url;
}
......@@ -329,10 +330,11 @@ function link_validate_url($text) {
"ß", // ß
)), ENT_QUOTES, 'UTF-8');
$allowed_protocols = variable_get('filter_allowed_protocols', array('http', 'https', 'ftp', 'news', 'nntp', 'telnet', 'mailto', 'irc', 'ssh', 'sftp', 'webcal'));
$link_domains = _link_domains();
$protocol = '(('. implode("|", $allowed_protocols) .'):\/\/)';
$authentication = '(([a-z0-9%' . $LINK_ICHARS . ']+(:[a-z0-9%'. $LINK_ICHARS . '!]*)?)?@)';
$domain = '(([a-z0-9' . $LINK_ICHARS_DOMAIN . ']([a-z0-9'. $LINK_ICHARS_DOMAIN . '\-_\[\]])*)(\.(([a-z0-9' . $LINK_ICHARS_DOMAIN . '\-_\[\]])+\.)*('. LINK_DOMAINS .'|[a-z]{2}))?)';
$domain = '(([a-z0-9' . $LINK_ICHARS_DOMAIN . ']([a-z0-9'. $LINK_ICHARS_DOMAIN . '\-_\[\]])*)(\.(([a-z0-9' . $LINK_ICHARS_DOMAIN . '\-_\[\]])+\.)*('. $link_domains .'|[a-z]{2}))?)';
$ipv4 = '([0-9]{1,3}(\.[0-9]{1,3}){3})';
$ipv6 = '([0-9a-fA-F]{1,4}(\:[0-9a-fA-F]{1,4}){7})';
$port = '(:([0-9]{1,5}))';
......
......@@ -10,7 +10,7 @@ define('LINK_INTERNAL', 'internal');
define('LINK_FRONT', 'front');
define('LINK_EMAIL', 'email');
define('LINK_NEWS', 'news');
define('LINK_DOMAINS', 'aero|arpa|asia|biz|com|cat|coop|edu|gov|info|int|jobs|mil|museum|name|nato|net|org|pro|travel|mobi|local');
define('LINK_DOMAINS', 'aero|arpa|asia|biz|build|com|cat|ceo|coop|edu|gov|info|int|jobs|mil|museum|name|nato|net|org|post|pro|tel|travel|mobi|local|xxx');
// There are many other characters which are legal other than simply a-z - this includes them.
// html_entity_decode() is buggy in php 4 - we'll put it back here for D7 when 5.x is assumed.
/*define('LINK_ICHARS', (string) html_entity_decode(implode("", array(
......@@ -499,3 +499,11 @@ function _link_content_generate($node, $field) {
'attributes' => array(),
);
}
/**
* Returns the list of allowed domains, including domains added by admins via variable_set/$config.
*/
function _link_domains() {
$link_extra_domains = variable_get('link_extra_domains', array());
return empty($link_extra_domains) ? LINK_DOMAINS : LINK_DOMAINS . '|' . implode('|', $link_extra_domains);
}
......@@ -287,6 +287,12 @@ class LinkValidateTest extends LinkValidateTestCase {
function test_link_ftp() {
$this->link_test_validate_url('ftp://www.example.com/');
}
// Validate that a custom tld can be used.
function test_link_custom_tld() {
variable_set('link_extra_domains', array('frog'));
$this->link_test_validate_url('http://www.example.frog/');
}
}
class LinkValidateTestNews extends LinkValidateTestCase {
......
......@@ -87,16 +87,17 @@ class link_views_handler_filter_protocol extends views_handler_filter_string {
// More complex case, no protocol specified but is automatically cleaned up
// by link_cleanup_url(). RegEx is required for this search operation.
if ($protocol == 'http') {
$link_domains = _link_domains();
if ($db_type == 'pgsql') {
// PostGreSQL code has NOT been tested. Please report any problems to the link issue queue.
// pgSQL requires all slashes to be double escaped in regular expressions.
// See http://www.postgresql.org/docs/8.1/static/functions-matching.html#FUNCTIONS-POSIX-REGEXP
$condition .= ' OR '. $field .' ~* \''.'^(([a-z0-9]([a-z0-9\\-_]*\\.)+)('. LINK_DOMAINS .'|[a-z][a-z]))'.'\'';
$condition .= ' OR '. $field .' ~* \''.'^(([a-z0-9]([a-z0-9\\-_]*\\.)+)('. $link_domains .'|[a-z][a-z]))'.'\'';
}
else {
// mySQL requires backslashes to be double (triple?) escaped within character classes.
// See http://dev.mysql.com/doc/refman/5.0/en/string-comparison-functions.html#operator_regexp
$condition .= ' OR '. $field .' REGEXP \''.'^(([a-z0-9]([a-z0-9\\\-_]*\.)+)('. LINK_DOMAINS .'|[a-z][a-z]))'.'\'';
$condition .= ' OR '. $field .' REGEXP \''.'^(([a-z0-9]([a-z0-9\\\-_]*\.)+)('. $link_domains .'|[a-z][a-z]))'.'\'';
}
}
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment