Subject: =?UTF-8?B?W0pJUkFdIChTUE1TLTIx?= =?UTF-8?B?MjUpIOe8lui+keaIkOaenO+8jOS6uuW3peefqemYtemHjeWkjeWQjg==?= =?UTF-8?B?77yM5Y+W5raI5oiQ5p6c77yM6YCA5Ye6?= =?UTF-8?B?6aG555uu77yM5YaN5qyh57yW6L6R6aG5?= =?UTF-8?B?55uu5LiA55u05o+Q56S65Lq65bel5YiG?= =?UTF-8?B?6Kej55qE6YOo5omA5LiN5Y+v6YeN5aSN?=
这是一封邮件的头部主题,解析的这种类型的时候出错了。请问有碰到类似情况的同学吗?下面这种一行的用下面的php解析没有问题,多行的就会有问题。我手动把多行的连接成一行解析,仍然有乱码。请问有好的解析方法吗,什么编程语言都不限制了。
Subject: =?UTF-8?B?5rWL6K+V5rWL6K+V?=
function decode_mime_string($string)
{
$en = mb_detect_encoding($string, "JIS, ASCII, EUC-CN, UTF8");
$string = mb_convert_encoding($string, "utf-8", $en);
$string = str_replace(array('\r\r\n','\r\n','\r','\n'),'',$string);
preg_match_all( '/\=\?(.*?)\?([QqBb])\?(.*?)\?\=/is', $string, $arr );
if(!isset($arr[3][0])){
return $string;
} else {
$arr2 = explode(',', $string);
$len = count($arr2);
if ($len != count($arr[0])) {
unset($arr[0], $arr[3]);
foreach($arr2 as $key => $val){
preg_match_all( '/\=\?(.*?)\?([QqBb])\?(.*?)\?\=/is', $val, $arr3 );
$arr[0][$key] = implode(' ', $arr3[0]);
$arr[3][$key] = implode(' ', $arr3[3]);
}
}
$chararr = $arr[1];
$encode = $arr[2];
$enstr = $arr[3];
foreach( $enstr as $key=>$val ){
$encoding = $encode[$key];
$encoded_text = $val;
$charset = $chararr[$key];
switch ($encoding)
{
case 'Q':
case 'q':
$encoded_text = str_replace('_', '%20', $encoded_text);
$encoded_text = str_replace('=', '%', $encoded_text);
$decoded = urldecode($encoded_text);
break;
case 'B':
case 'b':
$decoded = urldecode(base64_decode($encoded_text));
break;
default:
$decoded = '=?' . $charset . '?' . $encoding . '?' . $encoded_text . '?=';
break;
}
$charset = strtolower($charset);
if ( $charset != 'utf8' && $charset != 'utf-8' )
$decoded = mb_convert_encoding($decoded, "utf-8","gbk,gb2312");
$string = str_replace($arr[0][$key],$decoded,$string);
}
return $string;
}
}
版权声明:本文内容由阿里云实名注册用户自发贡献,版权归原作者所有,阿里云开发者社区不拥有其著作权,亦不承担相应法律责任。具体规则请查看《阿里云开发者社区用户服务协议》和《阿里云开发者社区知识产权保护指引》。如果您发现本社区中有涉嫌抄袭的内容,填写侵权投诉表单进行举报,一经查实,本社区将立刻删除涉嫌侵权内容。
结帖
http://stackoverflow.com/questions/8626786/proper-way-to-decode-incoming-email-subject-utf-8