Commit 1d18663b authored by Alex Vandiver's avatar Alex Vandiver
Browse files

Standardize on the stricter Encode::encode("UTF-8", ...) everywhere

This is not only for code consistency, but also for consistency of
output.  Encode::encode_utf8(...) is equivalent to
Encode::encode("utf8",...) which is the non-"strict" form of UTF-8.
Strict UTF-8 encoding differs in that (from `perldoc Encode`):

    ...its range is much narrower (0 ..  0x10_FFFF to cover only 21 bits
    instead of 32 or 64 bits) and some sequences are not allowed, like
    those used in surrogate pairs, the 31 non-character code points
    0xFDD0 .. 0xFDEF, the last two code points in any plane (0xXX_FFFE
    and 0xXX_FFFF), all non-shortest encodings, etc.

RT deals with interchange with databases, email, and other systems.  In
dealing with encodings, it should ensure that it does not produce byte
sequences that are invalid according to official Unicode standards.
parent ba110857
......@@ -1039,7 +1039,7 @@ sub SetSubjectToken {
$self->SetHeader(
Subject =>
RT::Interface::Email::AddSubjectTag(
Encode::decode_utf8( $head->get('Subject') ),
Encode::decode( "UTF-8", $head->get('Subject') ),
$self->TicketObj,
),
);
......
......@@ -419,16 +419,16 @@ sub BuildEmail {
);
my $entity = MIME::Entity->build(
From => Encode::encode_utf8($args{From}),
To => Encode::encode_utf8($args{To}),
From => Encode::encode("UTF-8", $args{From}),
To => Encode::encode("UTF-8", $args{To}),
Subject => RT::Interface::Email::EncodeToMIME( String => $args{Subject} ),
Type => "multipart/mixed",
);
$entity->attach(
Data => Encode::encode_utf8($content),
Type => 'text/html',
Charset => 'UTF-8',
Data => Encode::encode("UTF-8", $content),
Disposition => 'inline',
Encoding => "base64",
);
......
......@@ -588,7 +588,7 @@ sub SendEmailUsingTemplate {
return -1;
}
$mail->head->replace( $_ => Encode::encode_utf8( $args{ $_ } ) )
$mail->head->replace( $_ => Encode::encode( "UTF-8", $args{ $_ } ) )
foreach grep defined $args{$_}, qw(To Cc Bcc From);
$mail->head->replace( $_ => Encode::encode( "UTF-8", $args{ExtraHeaders}{$_} ) )
......@@ -1073,8 +1073,8 @@ sub SetInReplyTo {
if @references > 10;
my $mail = $args{'Message'};
$mail->head->replace( 'In-Reply-To' => Encode::encode_utf8(join ' ', @rtid? (@rtid) : (@id)) ) if @id || @rtid;
$mail->head->replace( 'References' => Encode::encode_utf8(join ' ', @references) );
$mail->head->replace( 'In-Reply-To' => Encode::encode( "UTF-8", join ' ', @rtid? (@rtid) : (@id)) ) if @id || @rtid;
$mail->head->replace( 'References' => Encode::encode( "UTF-8", join ' ', @references) );
}
sub PseudoReference {
......
......@@ -1578,8 +1578,12 @@ sub StoreRequestToken {
if ($ARGS->{Attach}) {
my $attachment = HTML::Mason::Commands::MakeMIMEEntity( AttachmentFieldName => 'Attach' );
my $file_path = delete $ARGS->{'Attach'};
# This needs to be decoded because the value is a reference;
# hence it was not decoded along with all of the standard
# arguments in DecodeARGS
$data->{attach} = {
filename => Encode::decode_utf8("$file_path"),
filename => Encode::decode("UTF-8", "$file_path"),
mime => $attachment,
};
}
......@@ -2295,7 +2299,7 @@ sub ProcessUpdateMessage {
Interface => RT::Interface::Web::MobileClient() ? 'Mobile' : 'Web',
);
$Message->head->replace( 'Message-ID' => Encode::encode_utf8(
$Message->head->replace( 'Message-ID' => Encode::encode( "UTF-8",
RT::Interface::Email::GenMessageId( Ticket => $args{'TicketObj'} )
) );
my $old_txn = RT::Transaction->new( $session{'CurrentUser'} );
......@@ -2429,7 +2433,10 @@ sub ProcessAttachments {
AttachmentFieldName => 'Attach'
);
my $file_path = Encode::decode_utf8("$new");
# This needs to be decoded because the value is a reference;
# hence it was not decoded along with all of the standard
# arguments in DecodeARGS
my $file_path = Encode::decode( "UTF-8", "$new");
$session{'Attachments'}{ $token }{ $file_path } = $attachment;
$update_session = 1;
......@@ -2463,9 +2470,9 @@ sub MakeMIMEEntity {
);
my $Message = MIME::Entity->build(
Type => 'multipart/mixed',
"Message-Id" => Encode::encode_utf8( RT::Interface::Email::GenMessageId ),
"Message-Id" => Encode::encode( "UTF-8", RT::Interface::Email::GenMessageId ),
"X-RT-Interface" => $args{Interface},
map { $_ => Encode::encode_utf8( $args{ $_} ) }
map { $_ => Encode::encode( "UTF-8", $args{ $_} ) }
grep defined $args{$_}, qw(Subject From Cc)
);
......
......@@ -253,7 +253,7 @@ use Plack::Builder;
use Plack::Request;
use Plack::Response;
use Plack::Util;
use Encode qw(encode_utf8);
use Encode;
sub PSGIApp {
my $self = shift;
......@@ -389,7 +389,7 @@ sub _psgi_response_cb {
$cleanup->();
return '';
}
return utf8::is_utf8($_[0]) ? encode_utf8($_[0]) : $_[0];
return utf8::is_utf8($_[0]) ? Encode::encode( "UTF-8", $_[0]) : $_[0];
return $_[0];
};
});
......
......@@ -88,7 +88,8 @@ sub Create {
my ($val, $msg) = $cf->_CanonicalizeValue(\%args);
return ($val, $msg) unless $val;
if ( defined $args{'Content'} && length( Encode::encode_utf8($args{'Content'}) ) > 255 ) {
my $encoded = Encode::encode("UTF-8", $args{'Content'});
if ( defined $args{'Content'} && length( $encoded ) > 255 ) {
if ( defined $args{'LargeContent'} && length $args{'LargeContent'} ) {
$RT::Logger->error("Content is longer than 255 bytes and LargeContent specified");
}
......
......@@ -680,7 +680,7 @@ sub _LimitCustomField {
my $single_value = !blessed($cf) || $cf->SingleValue;
my $negative_op = ($op eq '!=' || $op =~ /\bNOT\b/i);
my $value_is_long = (length( Encode::encode_utf8($value)) > 255) ? 1 : 0;
my $value_is_long = (length( Encode::encode( "UTF-8", $value)) > 255) ? 1 : 0;
$cfkey .= '.'. $self->{'_sql_multiple_cfs_index'}++
if not $single_value and $op =~ /^(!?=|(NOT )?LIKE)$/i;
......
......@@ -452,8 +452,8 @@ sub _Parse {
### Should we forgive normally-fatal errors?
$parser->ignore_errors(1);
# MIME::Parser doesn't play well with perl strings
utf8::encode($content);
# Always provide bytes, not characters, to MIME objects
$content = Encode::encode( 'UTF-8', $content );
$self->{'MIMEObj'} = eval { $parser->parse_data( \$content ) };
if ( my $error = $@ || $parser->last_error ) {
$RT::Logger->error( "$error" );
......@@ -675,8 +675,7 @@ sub _DowngradeFromHTML {
require Encode;
my $body = $new_entity->bodyhandle->as_string;
# need to decode_utf8, see the doc of MIMEObj method
$body = Encode::decode_utf8( $body );
$body = Encode::decode( "UTF-8", $body );
my $html = RT::Interface::Email::ConvertHTMLToText( $body );
$html = Encode::encode( "UTF-8", $html );
return unless defined $html;
......
......@@ -1590,7 +1590,7 @@ sub _RecordNote {
my $addresses = join ', ', (
map { RT::User->CanonicalizeEmailAddress( $_->address ) }
Email::Address->parse( $args{ $type . 'MessageTo' } ) );
$args{'MIMEObj'}->head->replace( 'RT-Send-' . $type, Encode::encode_utf8( $addresses ) );
$args{'MIMEObj'}->head->replace( 'RT-Send-' . $type, Encode::encode( "UTF-8", $addresses ) );
}
}
......@@ -1607,7 +1607,7 @@ sub _RecordNote {
my $msgid = Encode::decode( "UTF-8", $args{'MIMEObj'}->head->get('Message-ID') );
unless (defined $msgid && $msgid =~ /<(rt-.*?-\d+-\d+)\.(\d+-0-0)\@\Q$org\E>/) {
$args{'MIMEObj'}->head->replace(
'RT-Message-ID' => Encode::encode_utf8(
'RT-Message-ID' => Encode::encode( "UTF-8",
RT::Interface::Email::GenMessageId( Ticket => $self )
)
);
......@@ -1656,8 +1656,8 @@ sub DryRun {
}
my $Message = MIME::Entity->build(
Subject => defined $args{UpdateSubject} ? Encode::encode( "UTF-8", $args{UpdateSubject} ) : "",
Type => 'text/plain',
Subject => defined $args{UpdateSubject} ? Encode::encode_utf8( $args{UpdateSubject} ) : "",
Charset => 'UTF-8',
Data => Encode::encode("UTF-8", $args{'UpdateContent'} || ""),
);
......@@ -1689,9 +1689,9 @@ sub DryRunCreate {
my $self = shift;
my %args = @_;
my $Message = MIME::Entity->build(
Subject => defined $args{Subject} ? Encode::encode_utf8( $args{'Subject'} ) : "",
Subject => defined $args{Subject} ? Encode::encode( "UTF-8", $args{'Subject'} ) : "",
(defined $args{'Cc'} ?
( Cc => Encode::encode_utf8( $args{'Cc'} ) ) : ()),
( Cc => Encode::encode( "UTF-8", $args{'Cc'} ) ) : ()),
Type => 'text/plain',
Charset => 'UTF-8',
Data => Encode::encode( "UTF-8", $args{'Content'} || ""),
......
......@@ -897,7 +897,7 @@ sub _GeneratePassword_bcrypt {
key_nul => 1,
cost => $rounds,
salt => $salt,
}, Digest::SHA::sha512( encode_utf8($password) ) );
}, Digest::SHA::sha512( Encode::encode( 'UTF-8', $password) ) );
return join("!", "", "bcrypt", sprintf("%02d", $rounds),
Crypt::Eksblowfish::Bcrypt::en_base64( $salt ).
......@@ -918,7 +918,7 @@ sub _GeneratePassword_sha512 {
my $sha = Digest::SHA->new(512);
$sha->add($salt);
$sha->add(encode_utf8($password));
$sha->add(Encode::encode( 'UTF-8', $password));
return join("!", "", "sha512", $salt, $sha->b64digest);
}
......@@ -999,16 +999,16 @@ sub IsPassword {
my $hash = MIME::Base64::decode_base64($stored);
# Decoding yields 30 byes; first 4 are the salt, the rest are substr(SHA256,0,26)
my $salt = substr($hash, 0, 4, "");
return 0 unless substr(Digest::SHA::sha256($salt . Digest::MD5::md5(encode_utf8($value))), 0, 26) eq $hash;
return 0 unless substr(Digest::SHA::sha256($salt . Digest::MD5::md5(Encode::encode( "UTF-8", $value))), 0, 26) eq $hash;
} elsif (length $stored == 32) {
# Hex nonsalted-md5
return 0 unless Digest::MD5::md5_hex(encode_utf8($value)) eq $stored;
return 0 unless Digest::MD5::md5_hex(Encode::encode( "UTF-8", $value)) eq $stored;
} elsif (length $stored == 22) {
# Base64 nonsalted-md5
return 0 unless Digest::MD5::md5_base64(encode_utf8($value)) eq $stored;
return 0 unless Digest::MD5::md5_base64(Encode::encode( "UTF-8", $value)) eq $stored;
} elsif (length $stored == 13) {
# crypt() output
return 0 unless crypt(encode_utf8($value), $stored) eq $stored;
return 0 unless crypt(Encode::encode( "UTF-8", $value), $stored) eq $stored;
} else {
$RT::Logger->warning("Unknown password form");
return 0;
......@@ -1097,8 +1097,7 @@ sub GenerateAuthString {
my $self = shift;
my $protect = shift;
my $str = $self->AuthToken . $protect;
utf8::encode($str);
my $str = Encode::encode( "UTF-8", $self->AuthToken . $protect );
return substr(Digest::MD5::md5_hex($str),0,16);
}
......@@ -1115,8 +1114,7 @@ sub ValidateAuthString {
my $auth_string = shift;
my $protected = shift;
my $str = $self->AuthToken . $protected;
utf8::encode( $str );
my $str = Encode::encode( "UTF-8", $self->AuthToken . $protected );
return $auth_string eq substr(Digest::MD5::md5_hex($str),0,16);
}
......
......@@ -72,7 +72,7 @@ my $col_entry = sub {
delete $col->{title}
if $col->{title} and $col->{title} =~ /^\s*#\s*$/;
return {
header => Encode::encode_utf8(loc($col->{title} || $col->{attribute})),
header => Encode::encode( "UTF-8", loc($col->{title} || $col->{attribute}) ),
map => $m->comp(
"/Elements/ColumnMap",
Name => $col->{attribute},
......@@ -116,7 +116,7 @@ while (my $row = $Collection->Next) {
$val =~ s/(?:\n|\r)+/ /g; $val =~ s{\t}{ }g;
$val = $no_html->scrub($val);
$val = HTML::Entities::decode_entities($val);
Encode::encode_utf8($val);
Encode::encode( "UTF-8", $val);
} @$col)."\n");
}
}
......
......@@ -62,8 +62,8 @@ $notfound->() unless $path =~ m!^([^/]+)/([^/]+)/(.*)(\.(ical|ics))?!;
my ($name, $auth, $search) = ($1, $2, $3);
# Unescape parts
$_ =~ s/\%([0-9a-z]{2})/chr(hex($1))/gei for $name, $search;
# convert to perl strings
$_ = Encode::decode_utf8( $_ ) for $name, $search;
# Decode from bytes to characters
$_ = Encode::decode( "UTF-8", $_ ) for $name, $search;
my $user = RT::User->new( RT->SystemUser );
$user->Load( $name );
......
......@@ -67,8 +67,8 @@ if ( $m->request_comp->path =~ RT->Config->Get('WebNoAuthRegex') ) {
# Unescape parts
$name =~ s/\%([0-9a-z]{2})/chr(hex($1))/gei;
# convert to perl strings
$name = Encode::decode_utf8($name);
# Decode from bytes to characters
$name = Encode::decode( "UTF-8", $name );
my $user = RT::User->new(RT->SystemUser);
$user->Load($name);
......
......@@ -46,7 +46,7 @@
%#
%# END BPS TAGGED BLOCK }}}
<div><img src="<% RT->Config->Get('WebPath') %>/Ticket/Graphs/<% $id %>?<% $m->comp('/Elements/QueryString', %ARGS) %>" usemap="#<% $graph->{'NAME'} || 'test' %>" style="border: none" />
<% safe_run_child { Encode::decode_utf8( $graph->as_cmapx ) } |n %>
<% safe_run_child { Encode::decode( "UTF-8", $graph->as_cmapx ) } |n %>
</div>
<& ShowLegends, %ARGS, Ticket => $ticket &>
<%ARGS>
......
......@@ -81,7 +81,7 @@ $hideable = 1 if $rolledup;
#
my $page = $m->request_comp->path;
my $title_b64 = MIME::Base64::encode_base64(Encode::encode_utf8($title), '');
my $title_b64 = MIME::Base64::encode_base64(Encode::encode( "UTF-8", $title), '');
my $tid = "TitleBox--$page--" .
join '--', ($class, $bodyclass, $title_b64, $id);
......
......@@ -20,12 +20,12 @@ use HTML::Mason;
use HTML::Mason::Compiler;
use HTML::Mason::Compiler::ToObject;
BEGIN { require RT::Test; }
use Encode qw(decode_utf8);
use Encode;
sub compile_file {
my $file = shift;
my $text = decode_utf8(RT::Test->file_content($file));
my $text = Encode::decode( "UTF-8", RT::Test->file_content($file));
my $compiler = new HTML::Mason::Compiler::ToObject;
$compiler->compile(
......
......@@ -61,7 +61,7 @@ is ($#headers, 2, "testing a bunch of singline multiple headers" );
require Encode;
is(
Encode::decode( 'iso-8859-1', $mime->stringify_body ),
Encode::decode( 'utf8', "Håvard\n" ),
Encode::decode( 'UTF-8', "Håvard\n" ),
'body of ContentAsMIME is original'
);
}
......@@ -22,7 +22,7 @@ for my $charset ( keys %map ) {
my $mime = MIME::Entity->build(
Type => 'text/plain; charset=gb2312',
Data => [encode('gbk', decode_utf8("法新社倫敦11日電"))],
Data => [Encode::encode("gbk", Encode::decode( "UTF-8", "法新社倫敦11日電"))],
);
RT::I18N::SetMIMEEntityToUTF8($mime);
......
......@@ -55,7 +55,7 @@ like($root->__Value("Password"), qr/^\!$default\!/, "And is now upgraded to salt
# Non-ASCII salted truncated SHA-256
my $non_ascii_trunc = MIME::Base64::encode_base64(
"salt" . substr(Digest::SHA::sha256("salt".Digest::MD5::md5(encode_utf8("áěšý"))),0,26),
"salt" . substr(Digest::SHA::sha256("salt".Digest::MD5::md5(Encode::encode("UTF-8","áěšý"))),0,26),
""
);
$root->_Set( Field => "Password", Value => $non_ascii_trunc);
......
......@@ -72,7 +72,7 @@ foreach my $set ( 'ru', 'latin1' ) {
my $status = 1;
foreach my $mail ( @mails ) {
my $entity = parse_mail( $mail );
my $subject = Encode::decode_utf8( $entity->head->get('Subject') );
my $subject = Encode::decode( "UTF-8", $entity->head->get('Subject') );
$subject =~ /$string{$set}{test}/
or do { $status = 0; diag "wrong subject: $subject" };
}
......@@ -101,7 +101,7 @@ diag "ascii subject with non-ascii subject tag";
my $status = 1;
foreach my $mail ( @mails ) {
my $entity = parse_mail( $mail );
my $subject = Encode::decode_utf8( $entity->head->get('Subject') );
my $subject = Encode::decode( "UTF-8", $entity->head->get('Subject') );
$subject =~ /$string{$tag_set}{support}/
or do { $status = 0; diag "wrong subject: $subject" };
}
......@@ -122,7 +122,7 @@ foreach my $set ( 'ru', 'latin1' ) {
my $status = 1;
foreach my $mail ( @mails ) {
my $entity = parse_mail( $mail );
my $subject = Encode::decode_utf8( $entity->head->get('Subject') );
my $subject = Encode::decode( "UTF-8", $entity->head->get('Subject') );
$subject =~ /$string{$tag_set}{support}/
or do { $status = 0; diag "wrong subject: $subject" };
$subject =~ /$string{$set}{test}/
......@@ -171,7 +171,7 @@ diag "ascii subject with non-ascii subject prefix in template";
my $status = 1;
foreach my $mail ( @mails ) {
my $entity = parse_mail( $mail );
my $subject = Encode::decode_utf8( $entity->head->get('Subject') );
my $subject = Encode::decode( "UTF-8", $entity->head->get('Subject') );
$subject =~ /$string{$prefix_set}{autoreply}/
or do { $status = 0; diag "wrong subject: $subject" };
}
......@@ -192,7 +192,7 @@ foreach my $set ( 'ru', 'latin1' ) {
my $status = 1;
foreach my $mail ( @mails ) {
my $entity = parse_mail( $mail );
my $subject = Encode::decode_utf8( $entity->head->get('Subject') );
my $subject = Encode::decode( "UTF-8", $entity->head->get('Subject') );
$subject =~ /$string{$prefix_set}{autoreply}/
or do { $status = 0; diag "wrong subject: $subject" };
$subject =~ /$string{$set}{test}/
......@@ -222,7 +222,7 @@ foreach my $set ( 'ru', 'latin1' ) {
my $status = 1;
foreach my $mail ( @mails ) {
my $entity = parse_mail( $mail );
my $subject = Encode::decode_utf8( $entity->head->get('Subject') );
my $subject = Encode::decode( "UTF-8", $entity->head->get('Subject') );
$subject =~ /$string{$prefix_set}{autoreply}/
or do { $status = 0; diag "wrong subject: $subject" };
$subject =~ /$string{$tag_set}{support}/
......@@ -275,7 +275,7 @@ foreach my $set ( 'ru', 'latin1' ) {
my $status = 1;
foreach my $mail ( @mails ) {
my $entity = parse_mail( $mail );
my $subject = Encode::decode_utf8( $entity->head->get('Subject') );
my $subject = Encode::decode( "UTF-8", $entity->head->get('Subject') );
$subject =~ /$string{$set}{test}/
or do { $status = 0; diag "wrong subject: $subject" };
}
......@@ -303,7 +303,7 @@ foreach my $set ( 'ru', 'latin1' ) {
my $status = 1;
foreach my $mail ( @mails ) {
my $entity = parse_mail( $mail );
my $subject = Encode::decode_utf8( $entity->head->get('Subject') );
my $subject = Encode::decode( "UTF-8", $entity->head->get('Subject') );
$subject =~ /$string{$set}{test}/
or do { $status = 0; diag "wrong subject: $subject" };
$subject =~ /$string{$tag_set}{support}/
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment