Commit 7894729d authored by Kevin Falcone's avatar Kevin Falcone
Browse files

Merge branch '4.2/utf8-reckoning' into 4.2-trunk

parents 2620658d af9fe7c4
......@@ -51,7 +51,6 @@
use strict;
use warnings;
use utf8;
use open qw/ :std :encoding(UTF-8) /;
use File::Find;
......@@ -194,7 +193,7 @@ sub extract_strings_from_code {
$seen{$line}++;
unless ( defined $str ) {
print "\n" unless $errors++;
print " Couldn't process loc at $filename:$line:\n str«$str»\n";
print " Couldn't process loc at $filename:$line:\n $str\n";
next;
}
my $interp = (substr($str,0,1) eq '"' ? 1 : 0);
......@@ -238,7 +237,7 @@ sub extract_strings_from_code {
$seen{$line}++;
unless ( defined $str ) {
print "\n" unless $errors++;
print " Couldn't process loc_qw at $filename:$line:\n str«$str»\n";
print " Couldn't process loc_qw at $filename:$line:\n $str\n";
next;
}
foreach my $value (split ' ', $str) {
......@@ -255,7 +254,7 @@ sub extract_strings_from_code {
$seen{$line}++;
unless ( defined $key ) {
print "\n" unless $errors++;
print " Couldn't process loc_left_pair at $filename:$line:\n key«$key»\n";
print " Couldn't process loc_left_pair at $filename:$line:\n $key\n";
next;
}
my $interp = (substr($key,0,1) eq '"' ? 1 : 0);
......@@ -272,7 +271,7 @@ sub extract_strings_from_code {
$seen{$line}++;
unless ( defined $key && defined $val ) {
print "\n" unless $errors++;
print " Couldn't process loc_pair at $filename:$line:\n key«$key»\n val«$val»\n";
print " Couldn't process loc_pair at $filename:$line:\n $key\n $val\n";
next;
}
my $interp_key = (substr($key,0,1) eq '"' ? 1 : 0);
......@@ -293,7 +292,7 @@ sub extract_strings_from_code {
$line += ( $all =~ tr/\n/\n/ );
$seen{$line}++;
unless ( defined $key && defined $val ) {
warn "Couldn't process loc_pair at $filename:$line:\n key«$key»\n val«$val»\n";
warn "Couldn't process loc_pair at $filename:$line:\n $key\n $val\n";
next;
}
$val = substr($val, 1, -1); # dequote always quoted string
......
......@@ -53,6 +53,7 @@ use 5.010;
package RT;
use Encode ();
use File::Spec ();
use Cwd ();
use Scalar::Util qw(blessed);
......@@ -263,6 +264,9 @@ sub InitLogging {
$frame++ while caller($frame) && caller($frame) =~ /^Log::/;
my ($package, $filename, $line) = caller($frame);
# Encode to bytes, so we don't send wide characters
$p{message} = Encode::encode("UTF-8", $p{message});
$p{'message'} =~ s/(?:\r*\n)+$//;
return "[$$] [". gmtime(time) ."] [". $p{'level'} ."]: "
. $p{'message'} ." ($filename:$line)\n";
......@@ -278,8 +282,8 @@ sub InitLogging {
$frame++ while caller($frame) && caller($frame) =~ /^Log::/;
my ($package, $filename, $line) = caller($frame);
# syswrite() cannot take utf8; turn it off here.
Encode::_utf8_off($p{message});
# Encode to bytes, so we don't send wide characters
$p{message} = Encode::encode("UTF-8", $p{message});
$p{message} =~ s/(?:\r*\n)+$//;
if ($p{level} eq 'debug') {
......@@ -369,19 +373,9 @@ sub InitSignalHandlers {
## mechanism (see above).
$SIG{__WARN__} = sub {
# The 'wide character' warnings has to be silenced for now, at least
# until HTML::Mason offers a sane way to process both raw output and
# unicode strings.
# use 'goto &foo' syntax to hide ANON sub from stack
if( index($_[0], 'Wide character in ') != 0 ) {
unshift @_, $RT::Logger, qw(level warning message);
goto &Log::Dispatch::log;
}
# Return value is used only by RT::Test to filter warnings from
# reaching the Test::NoWarnings catcher. If Log::Dispatch::log() ever
# starts returning 'IGNORE', we'll need to switch to something more
# clever. I don't expect that to happen.
return 'IGNORE';
unshift @_, $RT::Logger, qw(level warning message);
goto &Log::Dispatch::log;
};
#When we call die, trap it and log->crit with the value of the die.
......
......@@ -535,15 +535,11 @@ sub _ParseMultilineTemplate {
my %args = (@_);
my $template_id;
require Encode;
require utf8;
my ( $queue, $requestor );
$RT::Logger->debug("Line: ===");
foreach my $line ( split( /\n/, $args{'Content'} ) ) {
$line =~ s/\r$//;
$RT::Logger->debug( "Line: " . utf8::is_utf8($line)
? Encode::encode_utf8($line)
: $line );
$RT::Logger->debug( "Line: $line" );
if ( $line =~ /^===/ ) {
if ( $template_id && !$queue && $args{'Queue'} ) {
$self->{'templates'}->{$template_id}
......@@ -740,10 +736,10 @@ sub ParseLines {
);
if ( $args{content} ) {
my $mimeobj = MIME::Entity->new();
$mimeobj->build(
Type => $args{'contenttype'} || 'text/plain',
Data => $args{'content'}
my $mimeobj = MIME::Entity->build(
Type => $args{'contenttype'} || 'text/plain',
Charset => 'UTF-8',
Data => [ map {Encode::encode( "UTF-8", $_ )} @{$args{'content'}} ],
);
$ticketargs{MIMEObj} = $mimeobj;
$ticketargs{UpdateType} = $args{'updatetype'} || 'correspond';
......
......@@ -257,7 +257,7 @@ sub Bcc {
sub AddressesFromHeader {
my $self = shift;
my $field = shift;
my $header = $self->TemplateObj->MIMEObj->head->get($field);
my $header = Encode::decode("UTF-8",$self->TemplateObj->MIMEObj->head->get($field));
my @addresses = Email::Address->parse($header);
return (@addresses);
......@@ -276,7 +276,7 @@ sub SendMessage {
# ability to pass @_ to a 'post' routine.
my ( $self, $MIMEObj ) = @_;
my $msgid = $MIMEObj->head->get('Message-ID');
my $msgid = Encode::decode( "UTF-8", $MIMEObj->head->get('Message-ID') );
chomp $msgid;
$self->ScripActionObj->{_Message_ID}++;
......@@ -299,7 +299,7 @@ sub SendMessage {
my $success = $msgid . " sent ";
foreach (@EMAIL_RECIPIENT_HEADERS) {
my $recipients = $MIMEObj->head->get($_);
my $recipients = Encode::decode( "UTF-8", $MIMEObj->head->get($_) );
$success .= " $_: " . $recipients if $recipients;
}
......@@ -531,7 +531,7 @@ sub RecordOutgoingMailTransaction {
$type = 'EmailRecord';
}
my $msgid = $MIMEObj->head->get('Message-ID');
my $msgid = Encode::decode( "UTF-8", $MIMEObj->head->get('Message-ID') );
chomp $msgid;
my ( $id, $msg ) = $transaction->Create(
......@@ -643,7 +643,7 @@ sub DeferDigestRecipients {
# Have to get the list of addresses directly from the MIME header
# at this point.
$RT::Logger->debug( $self->TemplateObj->MIMEObj->head->as_string );
$RT::Logger->debug( Encode::decode( "UTF-8", $self->TemplateObj->MIMEObj->head->as_string ) );
foreach my $rcpt ( map { $_->address } $self->AddressesFromHeader($mailfield) ) {
next unless $rcpt;
my $user_obj = RT::User->new(RT->SystemUser);
......@@ -752,7 +752,7 @@ sub RemoveInappropriateRecipients {
# If there are no recipients, don't try to send the message.
# If the transaction has content and has the header RT-Squelch-Replies-To
my $msgid = $self->TemplateObj->MIMEObj->head->get('Message-Id');
my $msgid = Encode::decode( "UTF-8", $self->TemplateObj->MIMEObj->head->get('Message-Id') );
chomp $msgid;
if ( my $attachment = $self->TransactionObj->Attachments->First ) {
......@@ -967,7 +967,8 @@ sub GetFriendlyName {
=head2 SetHeader FIELD, VALUE
Set the FIELD of the current MIME object into VALUE.
Set the FIELD of the current MIME object into VALUE, which should be in
characters, not bytes. Returns the new header, in bytes.
=cut
......@@ -980,7 +981,7 @@ sub SetHeader {
chomp $field;
my $head = $self->TemplateObj->MIMEObj->head;
$head->fold_length( $field, 10000 );
$head->replace( $field, $val );
$head->replace( $field, Encode::encode( "UTF-8", $val ) );
return $head->get($field);
}
......@@ -1021,7 +1022,7 @@ sub SetSubject {
$subject =~ s/(\r\n|\n|\s)/ /g;
$self->SetHeader( 'Subject', Encode::encode_utf8( $subject ) );
$self->SetHeader( 'Subject', $subject );
}
......@@ -1037,11 +1038,9 @@ sub SetSubjectToken {
my $head = $self->TemplateObj->MIMEObj->head;
$self->SetHeader(
Subject =>
Encode::encode_utf8(
RT::Interface::Email::AddSubjectTag(
Encode::decode_utf8( $head->get('Subject') ),
$self->TicketObj,
),
RT::Interface::Email::AddSubjectTag(
Encode::decode( "UTF-8", $head->get('Subject') ),
$self->TicketObj,
),
);
}
......@@ -1130,7 +1129,8 @@ sub PseudoReference {
=head2 SetHeaderAsEncoding($field_name, $charset_encoding)
This routine converts the field into specified charset encoding.
This routine converts the field into specified charset encoding, then
applies the MIME-Header transfer encoding.
=cut
......@@ -1140,8 +1140,8 @@ sub SetHeaderAsEncoding {
my $head = $self->TemplateObj->MIMEObj->head;
my $value = $head->get( $field );
$value = $self->MIMEEncodeString( $value, $enc );
my $value = Encode::decode("UTF-8", $head->get( $field ));
$value = $self->MIMEEncodeString( $value, $enc ); # Returns bytes
$head->replace( $field, $value );
}
......@@ -1151,7 +1151,8 @@ sub SetHeaderAsEncoding {
Takes a perl string and optional encoding pass it over
L<RT::Interface::Email/EncodeToMIME>.
Basicly encode a string using B encoding according to RFC2047.
Basicly encode a string using B encoding according to RFC2047, returning
bytes.
=cut
......
......@@ -110,7 +110,7 @@ sub Prepare {
my $txn_attachment = $self->TransactionObj->Attachments->First;
for my $header (qw/From To Cc Bcc/) {
if ( $txn_attachment->GetHeader( $header ) ) {
$mime->head->replace( $header => $txn_attachment->GetHeader($header) );
$mime->head->replace( $header => Encode::encode( "UTF-8", $txn_attachment->GetHeader($header) ) );
}
}
......
......@@ -130,13 +130,12 @@ sub Create {
my $head = $Attachment->head;
# Get the subject
my $Subject = $head->get( 'subject', 0 );
my $Subject = Encode::decode( 'UTF-8', $head->get( 'subject' ) );
$Subject = '' unless defined $Subject;
chomp $Subject;
utf8::decode( $Subject ) unless utf8::is_utf8( $Subject );
#Get the Message-ID
my $MessageId = $head->get( 'Message-ID', 0 );
my $MessageId = Encode::decode( "UTF-8", $head->get( 'Message-ID' ) );
defined($MessageId) or $MessageId = '';
chomp ($MessageId);
$MessageId =~ s/^<(.*?)>$/$1/o;
......@@ -150,18 +149,15 @@ sub Create {
my $content;
unless ( $head->get('Content-Length') ) {
my $length = 0;
if ( defined $Attachment->bodyhandle ) {
$content = $Attachment->bodyhandle->as_string;
utf8::encode( $content ) if utf8::is_utf8( $content );
$length = length $content;
}
$head->replace( 'Content-Length' => $length );
$length = length $Attachment->bodyhandle->as_string
if defined $Attachment->bodyhandle;
$head->replace( 'Content-Length' => Encode::encode( "UTF-8", $length ) );
}
$head = $head->as_string;
# MIME::Head doesn't support perl strings well and can return
# octets which later will be double encoded in low-level code
utf8::decode( $head ) unless utf8::is_utf8( $head );
$head = Encode::decode( 'UTF-8', $head );
# If a message has no bodyhandle, that means that it has subparts (or appears to)
# and we should act accordingly.
......
......@@ -437,15 +437,17 @@ sub SignEncrypt {
$args{'Signer'} =
$self->UseKeyForSigning
|| do {
my $addr = (Email::Address->parse( $entity->head->get( 'From' ) ))[0];
$addr? $addr->address : undef
my ($addr) = map {Email::Address->parse( Encode::decode( "UTF-8", $_ ) )}
$entity->head->get( 'From' );
$addr ? $addr->address : undef
};
}
if ( $args{'Encrypt'} && !$args{'Recipients'} ) {
my %seen;
$args{'Recipients'} = [
grep $_ && !$seen{ $_ }++, map $_->address,
map Email::Address->parse( $entity->head->get( $_ ) ),
map Email::Address->parse( Encode::decode("UTF-8", $_ ) ),
map $entity->head->get( $_ ),
qw(To Cc Bcc)
];
}
......
......@@ -494,7 +494,8 @@ sub SignEncryptRFC3156 {
}
if ( $args{'Encrypt'} ) {
my @recipients = map $_->address,
map Email::Address->parse( $entity->head->get( $_ ) ),
map Email::Address->parse( Encode::decode( "UTF-8", $_ ) ),
map $entity->head->get( $_ ),
qw(To Cc Bcc);
my ($tmp_fh, $tmp_fn) = File::Temp::tempfile( UNLINK => 1 );
......
......@@ -220,7 +220,7 @@ sub SignEncrypt {
if ( $args{'Encrypt'} ) {
my %seen;
$args{'Recipients'} = [
grep !$seen{$_}++, map $_->address, map Email::Address->parse($_),
grep !$seen{$_}++, map $_->address, map Email::Address->parse(Encode::decode("UTF-8",$_)),
grep defined && length, map $entity->head->get($_), qw(To Cc Bcc)
];
}
......@@ -742,7 +742,8 @@ sub CheckIfProtected {
if ( $security_type eq 'encrypted' ) {
my $top = $args{'TopEntity'}->head;
$res{'Recipients'} = [grep defined && length, map $top->get($_), 'To', 'Cc'];
$res{'Recipients'} = [map {Encode::decode("UTF-8", $_)}
grep defined && length, map $top->get($_), 'To', 'Cc'];
}
return %res;
......
......@@ -254,9 +254,6 @@ sub loc_fuzzy {
my $self = shift;
return '' if !defined $_[0] || $_[0] eq '';
# XXX: work around perl's deficiency when matching utf8 data
return $_[0] if Encode::is_utf8($_[0]);
return $self->LanguageHandle->maketext_fuzzy( @_ );
}
......
......@@ -386,9 +386,14 @@ sub BuildEmail {
$cid_of{$uri} = time() . $$ . int(rand(1e6));
# downgrade non-text strings, because all strings are utf8 by
# default, which is wrong for non-text strings.
if ( $mimetype !~ m{text/} ) {
# Encode textual data in UTF-8, and downgrade (treat
# codepoints as codepoints, and ensure the UTF-8 flag is
# off) everything else.
my @extra;
if ( $mimetype =~ m{text/} ) {
$data = Encode::encode( "UTF-8", $data );
@extra = ( Charset => "UTF-8" );
} else {
utf8::downgrade( $data, 1 ) or $RT::Logger->warning("downgrade $data failed");
}
......@@ -400,6 +405,7 @@ sub BuildEmail {
Disposition => 'inline',
Name => RT::Interface::Email::EncodeToMIME( String => $filename ),
'Content-Id' => $cid_of{$uri},
@extra,
);
return "cid:$cid_of{$uri}";
......@@ -413,16 +419,16 @@ sub BuildEmail {
);
my $entity = MIME::Entity->build(
From => Encode::encode_utf8($args{From}),
To => Encode::encode_utf8($args{To}),
From => Encode::encode("UTF-8", $args{From}),
To => Encode::encode("UTF-8", $args{To}),
Subject => RT::Interface::Email::EncodeToMIME( String => $args{Subject} ),
Type => "multipart/mixed",
);
$entity->attach(
Data => Encode::encode_utf8($content),
Type => 'text/html',
Charset => 'UTF-8',
Data => Encode::encode("UTF-8", $content),
Disposition => 'inline',
Encoding => "base64",
);
......@@ -558,7 +564,8 @@ sub GetResource {
$HTML::Mason::Commands::r->path_info($path);
# grab the query arguments
my %args = map { $_ => [ $uri->query_param($_) ] } $uri->query_param;
my %args = map { $_ => [ map {Encode::decode("UTF-8",$_)}
$uri->query_param($_) ] } $uri->query_param;
# Convert empty and single element arrayrefs to a non-ref scalar
@$_ < 2 and $_ = $_->[0]
for values %args;
......
......@@ -299,8 +299,8 @@ sub ParseCcAddressesFromHead {
my (@Addresses);
my @ToObjs = Email::Address->parse( $self->Head->get('To') );
my @CcObjs = Email::Address->parse( $self->Head->get('Cc') );
my @ToObjs = Email::Address->parse( Encode::decode( "UTF-8", $self->Head->get('To') ) );
my @CcObjs = Email::Address->parse( Encode::decode( "UTF-8", $self->Head->get('Cc') ) );
foreach my $AddrObj ( @ToObjs, @CcObjs ) {
my $Address = $AddrObj->address;
......
......@@ -62,7 +62,6 @@ use Locale::Maketext 1.04;
use Locale::Maketext::Lexicon 0.25;
use base 'Locale::Maketext::Fuzzy';
use Encode;
use MIME::Entity;
use MIME::Head;
use File::Glob;
......@@ -282,7 +281,7 @@ sub SetMIMEEntityToEncoding {
);
# If this is a textual entity, we'd need to preserve its original encoding
$head->replace( "X-RT-Original-Encoding" => $charset )
$head->replace( "X-RT-Original-Encoding" => Encode::encode( "UTF-8", $charset ) )
if $head->mime_attr('content-type.charset') or IsTextualContentType($head->mime_type);
return unless IsTextualContentType($head->mime_type);
......@@ -291,13 +290,12 @@ sub SetMIMEEntityToEncoding {
if ( $body && ($enc ne $charset || $enc =~ /^utf-?8(?:-strict)?$/i) ) {
my $string = $body->as_string or return;
RT::Util::assert_bytes($string);
$RT::Logger->debug( "Converting '$charset' to '$enc' for "
. $head->mime_type . " - "
. ( $head->get('subject') || 'Subjectless message' ) );
. ( Encode::decode("UTF-8",$head->get('subject')) || 'Subjectless message' ) );
# NOTE:: see the comments at the end of the sub.
Encode::_utf8_off($string);
my $orig_string = $string;
( my $success, $string ) = EncodeFromToWithCroak( $orig_string, $charset => $enc );
if ( !$success ) {
......@@ -328,30 +326,11 @@ sub SetMIMEEntityToEncoding {
}
}
# NOTES: Why Encode::_utf8_off before Encode::from_to
#
# All the strings in RT are utf-8 now. Quotes from Encode POD:
#
# [$length =] from_to($octets, FROM_ENC, TO_ENC [, CHECK])
# ... The data in $octets must be encoded as octets and not as
# characters in Perl's internal format. ...
#
# Not turning off the UTF-8 flag in the string will prevent the string
# from conversion.
=head2 DecodeMIMEWordsToUTF8 $raw
An utility method which mimics MIME::Words::decode_mimewords, but only
limited functionality. This function returns an utf-8 string.
It returns the decoded string, or the original string if it's not
encoded. Since the subroutine converts specified string into utf-8
charset, it should not alter a subject written in English.
Why not use MIME::Words directly? Because it fails in RT when I
tried. Maybe it's ok now.
limited functionality. Despite its name, this function returns the
bytes of the string, in UTF-8.
=cut
......@@ -690,13 +669,13 @@ sub SetMIMEHeadToEncoding {
return if $charset eq $enc and $preserve_words;
RT::Util::assert_bytes( $head->as_string );
foreach my $tag ( $head->tags ) {
next unless $tag; # seen in wild: headers with no name
my @values = $head->get_all($tag);
$head->delete($tag);
foreach my $value (@values) {
if ( $charset ne $enc || $enc =~ /^utf-?8(?:-strict)?$/i ) {
Encode::_utf8_off($value);
my $orig_value = $value;
( my $success, $value ) = EncodeFromToWithCroak( $orig_value, $charset => $enc );
if ( !$success ) {
......
......@@ -48,7 +48,6 @@
use strict;
use warnings;
use utf8;
package RT::I18N::fr;
use base 'RT::I18N';
......@@ -59,8 +58,8 @@ use warnings;
sub numf {
my ($handle, $num) = @_[0,1];
my $fr_num = $handle->SUPER::numf($num);
# French prefer to print 1000 as 1 000 rather than 1,000
$fr_num =~ tr<.,><, >;
# French prefer to print 1000 as 1(nbsp)000 rather than 1,000
$fr_num =~ tr<.,><,\x{A0}>;
return $fr_num;
}
......
......@@ -110,7 +110,7 @@ sub CheckForLoops {
my $head = shift;
# If this instance of RT sent it our, we don't want to take it in
my $RTLoop = $head->get("X-RT-Loop-Prevention") || "";
my $RTLoop = Encode::decode( "UTF-8", $head->get("X-RT-Loop-Prevention") || "" );
chomp ($RTLoop); # remove that newline
if ( $RTLoop eq RT->Config->Get('rtname') ) {
return 1;
......@@ -248,22 +248,27 @@ sub MailError {
# the colons are necessary to make ->build include non-standard headers
my %entity_args = (
Type => "multipart/mixed",
From => $args{'From'},
Bcc => $args{'Bcc'},
To => $args{'To'},
Subject => $args{'Subject'},
'X-RT-Loop-Prevention:' => RT->Config->Get('rtname'),
From => Encode::encode( "UTF-8", $args{'From'} ),
Bcc => Encode::encode( "UTF-8", $args{'Bcc'} ),
To => Encode::encode( "UTF-8", $args{'To'} ),
Subject => EncodeToMIME( String => $args{'Subject'} ),
'X-RT-Loop-Prevention:' => Encode::encode( "UTF-8", RT->Config->Get('rtname') ),
);
# only set precedence if the sysadmin wants us to
if (defined(RT->Config->Get('DefaultErrorMailPrecedence'))) {
$entity_args{'Precedence:'} = RT->Config->Get('DefaultErrorMailPrecedence');
$entity_args{'Precedence:'} =
Encode::encode( "UTF-8", RT->Config->Get('DefaultErrorMailPrecedence') );
}
my $entity = MIME::Entity->build(%entity_args);
SetInReplyTo( Message => $entity, InReplyTo => $args{'MIMEObj'} );
$entity->attach( Data => $args{'Explanation'} . "\n" );
$entity->attach(
Type => "text/plain",
Charset => "UTF-8",
Data => Encode::encode( "UTF-8", $args{'Explanation'} . "\n" ),
);
if ( $args{'MIMEObj'} ) {
$args{'MIMEObj'}->sync_headers;
......@@ -271,7 +276,7 @@ sub MailError {
}
if ( $args{'Attach'} ) {
$entity->attach( Data => $args{'Attach'}, Type => 'message/rfc822' );
$entity->attach( Data => Encode::encode( "UTF-8", $args{'Attach'} ), Type => 'message/rfc822' );
}
......@@ -362,7 +367,7 @@ sub SendEmail {
return 0;
}
my $msgid = $args{'Entity'}->head->get('Message-ID') || '';
my $msgid = Encode::decode( "UTF-8", $args{'Entity'}->head->get('Message-ID') || '' );
chomp $msgid;
# If we don't have any recipients to send to, don't send a message;
......@@ -382,7 +387,7 @@ sub SendEmail {
if (my $precedence = RT->Config->Get('DefaultMailPrecedence')
and !$args{'Entity'}->head->get("Precedence")
) {
$args{'Entity'}->head->replace( 'Precedence', $precedence );
$args{'Entity'}->head->replace( 'Precedence', Encode::encode("UTF-8",$precedence) );