1
0
mirror of https://gitee.com/ledc/IYUUAutoReseed synced 2025-05-03 15:53:27 +00:00
2020-01-14 19:17:29 +08:00

999 lines
31 KiB
PHP
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

<?php
// +----------------------------------------------------------------------
// | PHPSpider [ A PHP Framework For Crawler ]
// +----------------------------------------------------------------------
// | Copyright (c) 2006-2014 https://doc.phpspider.org All rights reserved.
// +----------------------------------------------------------------------
// | Licensed ( http://www.apache.org/licenses/LICENSE-2.0 )
// +----------------------------------------------------------------------
// | Author: Seatle Yang <seatle@foxmail.com>
// +----------------------------------------------------------------------
// +----------------------------------------------------------------------
// | GET请求
// | requests::get('http://www.test.com');
// | SERVER
// | $_GET
// +----------------------------------------------------------------------
// | POST请求
// | $data = array('name'=>'request');
// | requests::post('http://www.test.com', $data);
// | SERVER
// | $_POST
// +----------------------------------------------------------------------
// | POST RESTful请求
// | $data = array('name'=>'request');
// | $data_string = json_encode($data);
// | requests::set_header("Content-Type", "application/json");
// | requests::post('http://www.test.com', $data_string);
// | SERVER
// | file_get_contents('php://input')
// +----------------------------------------------------------------------
// | POST 文件上传
// | $data = array('file1'=>''./data/phpspider.log'');
// | requests::post('http://www.test.com', null, $data);
// | SERVER
// | $_FILES
// +----------------------------------------------------------------------
// | 代理
// | requests::set_proxy(array('223.153.69.150:42354'));
// | $html = requests::get('https://www.test.com');
// +----------------------------------------------------------------------
//----------------------------------
// PHPSpider请求类文件
//----------------------------------
namespace phpspider\core;
if (!function_exists('curl_file_create'))
{
function curl_file_create($filename, $mimetype = '', $postname = '')
{
return "@$filename;filename="
. ($postname ?: basename($filename))
. ($mimetype ? ";type=$mimetype" : '');
}
}
class requests
{
const VERSION = '2.0.1';
protected static $ch = null;
/**** Public variables ****/
/* user definable vars */
public static $timeout = 15;
public static $encoding = null;
public static $input_encoding = null;
public static $output_encoding = null;
public static $cookies = array(); // array of cookies to pass
// $cookies['username'] = "seatle";
public static $rawheaders = array(); // array of raw headers to send
public static $domain_cookies = array(); // array of cookies for domain to pass
public static $hosts = array(); // random host binding for make request faster
public static $headers = array(); // headers returned from server sent here
public static $useragents = array("requests/2.0.0"); // random agent we masquerade as
public static $client_ips = array(); // random ip we masquerade as
public static $proxies = array(); // random proxy ip
public static $raw = ""; // head + body content returned from server sent here
public static $head = ""; // head content
public static $content = ""; // The body before encoding
public static $text = ""; // The body after encoding
public static $info = array(); // curl info
public static $history = 302; // http request status before redirect. ex:30x
public static $status_code = 0; // http request status
public static $error = ""; // error messages sent here
/**
* set timeout
* $timeout 为数组时会分别设置connect和read
*
* @param init or array $timeout
* @return
*/
public static function set_timeout($timeout)
{
self::$timeout = $timeout;
}
/**
* 设置代理
* 如果代理有多个,请求时会随机使用
*
* @param mixed $proxies
* array (
* 'socks5://user1:pass2@host:port',
* 'socks5://user2:pass2@host:port'
*)
* @return void
* @author seatle <seatle@foxmail.com>
* @created time :2016-09-18 10:17
*/
public static function set_proxy($proxy)
{
self::$proxies = is_array($proxy) ? $proxy : array($proxy);
}
/**
* 删除代理
* 因为每个链接信息里面都有代理信息,有的链接需要,有的不需要,所以必须提供一个删除功能
*
* @return void
* @author seatle <seatle@foxmail.com>
* @created time :2018-07-16 17:59
*/
public static function del_proxy()
{
self::$proxies = array();
}
/**
* 自定义请求头部
* 请求头内容可以用 requests::$rawheaders 来获取
* 比如获取Content-Typerequests::$rawheaders['Content-Type']
*
* @param string $headers
* @return void
*/
public static function set_header($key, $value)
{
self::$rawheaders[$key] = $value;
}
/**
* 设置全局COOKIE
*
* @param string $cookie
* @return void
*/
public static function set_cookie($key, $value, $domain = '')
{
if (empty($key))
{
return false;
}
if (!empty($domain))
{
self::$domain_cookies[$domain][$key] = $value;
}
else
{
self::$cookies[$key] = $value;
}
return true;
}
/**
* 批量设置全局cookie
*
* @param mixed $cookies
* @param string $domain
* @return void
* @author seatle <seatle@foxmail.com>
* @created time :2017-08-03 18:06
*/
public static function set_cookies($cookies, $domain = '')
{
$cookies_arr = explode(';', $cookies);
if (empty($cookies_arr))
{
return false;
}
foreach ($cookies_arr as $cookie)
{
$cookie_arr = explode('=', $cookie, 2);
$key = $cookie_arr[0];
$value = empty($cookie_arr[1]) ? '' : $cookie_arr[1];
if (!empty($domain))
{
self::$domain_cookies[$domain][$key] = $value;
}
else
{
self::$cookies[$key] = $value;
}
}
return true;
}
/**
* 获取单一Cookie
*
* @param mixed $name cookie名称
* @param string $domain 不传则取全局cookie就是手动set_cookie的cookie
* @return void
* @author seatle <seatle@foxmail.com>
* @created time :2017-08-03 18:06
*/
public static function get_cookie($name, $domain = '')
{
if (!empty($domain) && !isset(self::$domain_cookies[$domain]))
{
return '';
}
$cookies = empty($domain) ? self::$cookies : self::$domain_cookies[$domain];
return isset($cookies[$name]) ? $cookies[$name] : '';
}
/**
* 获取Cookie数组
*
* @param string $domain 不传则取全局cookie就是手动set_cookie的cookie
* @return void
* @author seatle <seatle@foxmail.com>
* @created time :2017-08-03 18:06
*/
public static function get_cookies($domain = '')
{
if (!empty($domain) && !isset(self::$domain_cookies[$domain]))
{
return array();
}
return empty($domain) ? self::$cookies : self::$domain_cookies[$domain];
}
/**
* 删除Cookie
*
* @param string $domain 不传则删除全局Cookie
* @return void
* @author seatle <seatle@foxmail.com>
* @created time :2017-08-03 18:06
*/
public static function del_cookie($key, $domain = '')
{
if (empty($key))
{
return false;
}
if (!empty($domain) && !isset(self::$domain_cookies[$domain]))
{
return false;
}
if (!empty($domain))
{
if (isset(self::$domain_cookies[$domain][$key]))
{
unset(self::$domain_cookies[$domain][$key]);
}
}
else
{
if (isset(self::$cookies[$key]))
{
unset(self::$cookies[$key]);
}
}
return true;
}
/**
* 删除Cookie
*
* @param string $domain 不传则删除全局Cookie
* @return void
* @author seatle <seatle@foxmail.com>
* @created time :2017-08-03 18:06
*/
public static function del_cookies($domain = '')
{
if (!empty($domain) && !isset(self::$domain_cookies[$domain]))
{
return false;
}
if ( empty($domain) )
{
self::$cookies = array();
}
else
{
if (isset(self::$domain_cookies[$domain]))
{
unset(self::$domain_cookies[$domain]);
}
}
return true;
}
/**
* 设置随机的user_agent
*
* @param string $useragent
* @return void
*/
public static function set_useragent($useragent)
{
self::$useragents = is_array($useragent) ? $useragent : array($useragent);
}
/**
* set referer
*
*/
public static function set_referer($referer)
{
self::$rawheaders['Referer'] = $referer;
}
/**
* 设置伪造IP
* 传入数组则为随机IP
* @param string $ip
* @return void
*/
public static function set_client_ip($ip)
{
self::$client_ips = is_array($ip) ? $ip : array($ip);
}
/**
* 删除伪造IP
*
* @return void
* @author seatle <seatle@foxmail.com>
* @created time :2018-07-16 17:59
*/
public static function del_client_ip()
{
self::$client_ips = array();
}
/**
* 设置中文请求
*
* @param string $lang
* @return void
*/
public static function set_accept_language($lang = 'zh-CN')
{
self::$rawheaders['Accept-Language'] = $lang;
}
/**
* 设置Hosts
* 负载均衡到不同的服务器如果对方使用CDN采用这个是最好的了
*
* @param string $hosts
* @return void
*/
public static function set_hosts($host, $ips = array())
{
$ips = is_array($ips) ? $ips : array($ips);
self::$hosts[$host] = $ips;
}
/**
* 分割返回的header和body
* header用来判断编码和获取Cookie
* body用来判断编码得到编码前和编码后的内容
*
* @return void
* @author seatle <seatle@foxmail.com>
* @created time :2017-08-03 18:06
*/
public static function split_header_body()
{
$head = $body = '';
$head = substr(self::$raw, 0, self::$info['header_size']);
$body = substr(self::$raw, self::$info['header_size']);
// http header
self::$head = $head;
// The body before encoding
self::$content = $body;
//$http_headers = array();
//// 解析HTTP数据流
//if (!empty(self::$raw))
//{
//self::get_response_cookies($domain);
//// body里面可能有 \r\n\r\n但是第一个一定是HTTP Header去掉后剩下的就是body
//$array = explode("\r\n\r\n", self::$raw);
//foreach ($array as $k=>$v)
//{
//// post 方法会有两个http headerHTTP/1.1 100 Continue、HTTP/1.1 200 OK
//if (preg_match("#^HTTP/.*? 100 Continue#", $v))
//{
//unset($array[$k]);
//continue;
//}
//if (preg_match("#^HTTP/.*? \d+ #", $v))
//{
//$header = $v;
//unset($array[$k]);
//$http_headers = self::get_response_headers($v);
//}
//}
//$body = implode("\r\n\r\n", $array);
//}
// 设置了输出编码的转码,注意: xpath只支持utf-8iso-8859-1 不要转他本身就是utf-8
$body = self::encoding($body); //自动转码
// 转码后
self::$encoding = self::$output_encoding;
// The body after encoding
self::$text = $body;
return array($head, $body);
}
/**
* 获得域名相对应的Cookie
*
* @param mixed $header
* @param mixed $domain
* @return void
* @author seatle <seatle@foxmail.com>
* @created time :2017-08-03 18:06
*/
public static function get_response_cookies($header, $domain)
{
// 解析Cookie并存入 self::$cookies 方便调用
preg_match_all("/.*?Set\-Cookie: ([^\r\n]*)/i", $header, $matches);
$cookies = empty($matches[1]) ? array() : $matches[1];
// 解析到Cookie
if (!empty($cookies))
{
$cookies = implode(';', $cookies);
$cookies = explode(';', $cookies);
foreach ($cookies as $cookie)
{
$cookie_arr = explode('=', $cookie, 2);
// 过滤 httponly、secure
if (count($cookie_arr) < 2)
{
continue;
}
$cookie_name = !empty($cookie_arr[0]) ? trim($cookie_arr[0]) : '';
if (empty($cookie_name))
{
continue;
}
// 过滤掉domain路径
if (in_array(strtolower($cookie_name), array('path', 'domain', 'expires', 'max-age')))
{
continue;
}
self::$domain_cookies[$domain][trim($cookie_arr[0])] = trim($cookie_arr[1]);
}
}
}
/**
* 获得response header
* 此方法占时没有用到
*
* @param mixed $header
* @return void
* @author seatle <seatle@foxmail.com>
* @created time :2017-08-03 18:06
*/
public static function get_response_headers($header)
{
$headers = array();
$header_lines = explode("\n", $header);
if (!empty($header_lines))
{
foreach ($header_lines as $line)
{
$header_arr = explode(':', $line, 2);
$key = empty($header_arr[0]) ? '' : trim($header_arr[0]);
$val = empty($header_arr[1]) ? '' : trim($header_arr[1]);
if (empty($key) || empty($val))
{
continue;
}
$headers[$key] = $val;
}
}
self::$headers = $headers;
return self::$headers;
}
/**
* 获取编码
* @param $string
* @return string
*/
public static function get_encoding($string)
{
$encoding = mb_detect_encoding($string, array('UTF-8', 'GBK', 'GB2312', 'LATIN1', 'ASCII', 'BIG5', 'ISO-8859-1'));
return strtolower($encoding);
}
/**
* 移除页面head区域代码
* @param $html
* @return mixed
*/
private static function _remove_head($html)
{
return preg_replace('/<head.+?>.+<\/head>/is', '<head></head>', $html);
}
/**
* 简单的判断一下参数是否为一个URL链接
* @param string $str
* @return boolean
*/
private static function _is_url($url)
{
//$pattern = '/^http(s)?:\\/\\/.+/';
$pattern = "/\b(([\w-]+:\/\/?|www[.])[^\s()<>]+(?:\([\w\d]+\)|([^[:punct:]\s]|\/)))/";
if (preg_match($pattern, $url))
{
return true;
}
return false;
}
/**
* 初始化 CURL
*
*/
public static function init()
{
if (!is_resource ( self::$ch ))
{
self::$ch = curl_init ();
curl_setopt( self::$ch, CURLOPT_RETURNTRANSFER, true );
curl_setopt( self::$ch, CURLOPT_HEADER, false );
curl_setopt( self::$ch, CURLOPT_USERAGENT, "phpspider-requests/".self::VERSION );
// 如果设置了两个时间,就分开设置
if (is_array(self::$timeout))
{
curl_setopt( self::$ch, CURLOPT_CONNECTTIMEOUT, self::$timeout[0] );
curl_setopt( self::$ch, CURLOPT_TIMEOUT, self::$timeout[1]);
}
else
{
curl_setopt(self::$ch, CURLOPT_CONNECTTIMEOUT, ceil(self::$timeout / 2));
curl_setopt(self::$ch, CURLOPT_TIMEOUT, self::$timeout);
}
curl_setopt(self::$ch, CURLOPT_MAXREDIRS, 5); //maximum number of redirects allowed
// 在多线程处理场景下使用超时选项时会忽略signals对应的处理函数但是无耐的是还有小概率的crash情况发生
curl_setopt( self::$ch, CURLOPT_NOSIGNAL, true);
}
return self::$ch;
}
/**
* get 请求
*/
public static function get($url, $fields = array(), $allow_redirects = true, $cert = NULL)
{
self::init ();
return self::request($url, 'get', $fields, NULL, $allow_redirects, $cert);
}
/**
* post 请求
* $fields 有三种类型:1、数组2、http query3、json
* 1、array('name'=>'yangzetao')
* 2、http_build_query(array('name'=>'yangzetao'))
* 3、json_encode(array('name'=>'yangzetao'))
* 前两种是普通的post可以用$_POST方式获取
* 第三种是post stream( json rpc其实就是webservice )
* 虽然是post方式但是只能用流方式 http://input 后者 $HTTP_RAW_POST_DATA 获取
*
* @param mixed $url
* @param array $fields
* @param mixed $proxies
* @static
* @access public
* @return void
*/
public static function post($url, $fields = array(), $files = array(), $allow_redirects = true, $cert = NULL)
{
self::init ();
return self::request($url, 'POST', $fields, $files, $allow_redirects, $cert);
}
public static function put($url, $fields = array(), $allow_redirects = true, $cert = NULL)
{
self::init ();
return self::request($url, 'PUT', $fields, $allow_redirects, $cert);
}
public static function delete($url, $fields = array(), $allow_redirects = true, $cert = NULL)
{
self::init ();
return self::request($url, 'DELETE', $fields, $allow_redirects, $cert);
}
// 响应HTTP头域里的元信息
// 此方法被用来获取请求实体的元信息而不需要传输实体主体entity-body
// 此方法经常被用来测试超文本链接的有效性,可访问性,和最近的改变。.
public static function head($url, $fields = array(), $allow_redirects = true, $cert = NULL)
{
self::init ();
self::request($url, 'HEAD', $fields, $allow_redirects, $cert);
}
public static function options($url, $fields = array(), $allow_redirects = true, $cert = NULL)
{
self::init ();
return self::request($url, 'OPTIONS', $fields, $allow_redirects, $cert);
}
public static function patch($url, $fields = array(), $allow_redirects = true, $cert = NULL)
{
self::init ();
return self::request($url, 'PATCH', $fields, $allow_redirects, $cert);
}
/**
* request
*
* @param mixed $url 请求URL
* @param string $method 请求方法
* @param array $fields 表单字段
* @param array $files 上传文件
* @param mixed $cert CA证书
* @return void
* @author seatle <seatle@foxmail.com>
* @created time :2017-08-03 18:06
*/
public static function request($url, $method = 'GET', $fields = array(), $files = array(), $allow_redirects = true, $cert = NULL)
{
$method = strtoupper($method);
if(!self::_is_url($url))
{
self::$error = "You have requested URL ({$url}) is not a valid HTTP address";
return false;
}
// 如果是 get 方式,直接拼凑一个 url 出来
if ($method == 'GET' && !empty($fields))
{
$url = $url.(strpos($url, '?') === false ? '?' : '&').http_build_query($fields);
}
$parse_url = parse_url($url);
if (empty($parse_url) || empty($parse_url['host']) || !in_array($parse_url['scheme'], array('http', 'https')))
{
self::$error = "No connection adapters were found for '{$url}'";
return false;
}
$scheme = $parse_url['scheme'];
$domain = $parse_url['host'];
// 随机绑定 hosts做负载均衡
if (self::$hosts)
{
if (isset(self::$hosts[$domain]))
{
$hosts = self::$hosts[$domain];
$key = rand(0, count($hosts)-1);
$ip = $hosts[$key];
$url = str_replace($domain, $ip, $url);
self::$rawheaders['Host'] = $domain;
}
}
curl_setopt( self::$ch, CURLOPT_URL, $url );
if ($method != 'GET')
{
// 如果是 post 方式
if ($method == 'POST')
{
//curl_setopt( self::$ch, CURLOPT_POST, true );
$tmpheaders = array_change_key_case(self::$rawheaders, CASE_LOWER);
// 有些RESTful服务只接受JSON形态的数据
// CURLOPT_POST会把上傳的文件类型设为 multipart/form-data
// 把CURLOPT_POSTFIELDS的内容按multipart/form-data 的形式编码
// CURLOPT_CUSTOMREQUEST可以按指定内容上传
if ( isset($tmpheaders['content-type']) && $tmpheaders['content-type'] == 'application/json' )
{
curl_setopt( self::$ch, CURLOPT_CUSTOMREQUEST, $method );
}
else
{
curl_setopt( self::$ch, CURLOPT_POST, true );
}
$file_fields = array();
if (!empty($files))
{
foreach ($files as $postname => $file)
{
$filepath = realpath($file);
// 如果文件不存在
if (!file_exists($filepath))
{
continue;
}
$filename = basename($filepath);
$type = self::get_mimetype($filepath);
$file_fields[$postname] = curl_file_create($filepath, $type, $filename);
// curl -F "name=seatle&file=@/absolute/path/to/image.png" htt://localhost/uploadfile.php
//$cfile = '@'.realpath($filename).";type=".$type.";filename=".$filename;
}
}
}
else
{
self::$rawheaders['X-HTTP-Method-Override'] = $method;
curl_setopt( self::$ch, CURLOPT_CUSTOMREQUEST, $method );
}
if ( $method == 'POST' )
{
// 不是上传文件的用http_build_query, 能实现更好的兼容性,更小的请求数据包
if ( empty($file_fields) )
{
// post方式
if ( is_array($fields) )
{
$fields = http_build_query($fields);
}
}
else
{
// 有post数据
if ( is_array($fields) && !empty($fields) )
{
// 某些server可能会有问题
$fields = array_merge($fields, $file_fields);
}
else
{
$fields = $file_fields;
}
}
// 不能直接传数组不知道是什么Bug会非常慢
curl_setopt( self::$ch, CURLOPT_POSTFIELDS, $fields );
}
}
$cookies = self::get_cookies();
$domain_cookies = self::get_cookies($domain);
$cookies = array_merge($cookies, $domain_cookies);
// 是否设置了cookie
if (!empty($cookies))
{
foreach ($cookies as $key=>$value)
{
$cookie_arr[] = $key.'='.$value;
}
$cookies = implode('; ', $cookie_arr);
curl_setopt(self::$ch, CURLOPT_COOKIE, $cookies);
}
if (!empty(self::$useragents))
{
$key = rand(0, count(self::$useragents) - 1);
self::$rawheaders['User-Agent'] = self::$useragents[$key];
}
if (!empty(self::$client_ips))
{
$key = rand(0, count(self::$client_ips) - 1);
self::$rawheaders['CLIENT-IP'] = self::$client_ips[$key];
self::$rawheaders['X-FORWARDED-FOR'] = self::$client_ips[$key];
}
if (self::$rawheaders)
{
$http_headers = array();
foreach (self::$rawheaders as $k=>$v)
{
$http_headers[] = $k.': '.$v;
}
curl_setopt( self::$ch, CURLOPT_HTTPHEADER, $http_headers );
}
curl_setopt( self::$ch, CURLOPT_ENCODING, 'gzip' );
// 关闭验证
if ($scheme == 'https')
{
curl_setopt(self::$ch, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt(self::$ch, CURLOPT_SSL_VERIFYHOST, false);
}
if (self::$proxies)
{
$key = rand(0, count(self::$proxies) - 1);
$proxy = self::$proxies[$key];
curl_setopt( self::$ch, CURLOPT_PROXY, $proxy );
}
// header + bodyheader 里面有 cookie
curl_setopt( self::$ch, CURLOPT_HEADER, true );
// 请求跳转后的内容
if ($allow_redirects)
{
curl_setopt( self::$ch, CURLOPT_FOLLOWLOCATION, true);
}
self::$raw = curl_exec ( self::$ch );
// 真实url
//$location = curl_getinfo( self::$ch, CURLINFO_EFFECTIVE_URL);
self::$info = curl_getinfo( self::$ch );
//print_r(self::$info);
self::$status_code = self::$info['http_code'];
if (self::$raw === false)
{
self::$error = 'Curl error: ' . curl_error( self::$ch );
//trigger_error(self::$error, E_USER_WARNING);
}
// 关闭句柄
curl_close( self::$ch );
// 请求成功之后才把URL存起来
list($header, $text) = self::split_header_body();
self::$history = self::get_history($header);
self::$headers = self::get_response_headers($header);
self::get_response_cookies($header, $domain);
//$data = substr($data, 10);
//$data = gzinflate($data);
return $text;
}
public static function get_history($header)
{
$status_code = 0;
$lines = explode("\n", $header);
foreach ($lines as $line)
{
$line = trim($line);
if (preg_match("#^HTTP/.*? (\d+) Found#", $line, $out))
{
$status_code = empty($out[1]) ? 0 : intval($out[1]);
}
}
return $status_code;
}
// 获取 mimetype
public static function get_mimetype($filepath)
{
$fp = finfo_open(FILEINFO_MIME);
$mime = finfo_file($fp, $filepath);
finfo_close($fp);
$arr = explode(';', $mime);
$type = empty($arr[0]) ? '' : $arr[0];
return $type;
}
/**
* 拼凑文件和表单
* 占时没有用到
*
* @param mixed $post_fields
* @param mixed $file_fields
* @return void
* @author seatle <seatle@foxmail.com>
* @created time :2017-08-03 18:06
*/
public static function get_postfile_form($post_fields, $file_fields)
{
// 构造post数据
$data = '';
$delimiter = '-------------' . uniqid();
// 表单数据
foreach ($post_fields as $name => $content)
{
$data .= '--'.$delimiter."\r\n";
$data .= 'Content-Disposition: form-data; name = "'.$name.'"';
$data .= "\r\n\r\n";
$data .= $content;
$data .= "\r\n";
}
foreach ($file_fields as $input_name => $file)
{
$data .= '--'.$delimiter."\r\n";
$data .= 'Content-Disposition: form-data; name = "'.$input_name.'";'.
' filename="'.$file['filename'].'"'."\r\n";
$data .= "Content-Type: {$file['type']}\r\n";
$data .= "\r\n";
$data .= $file['content'];
$data .= "\r\n";
}
// 结束符
$data .= '--'.$delimiter."--\r\n";
//return array(
//CURLOPT_HTTPHEADER => array(
//'Content-Type:multipart/form-data;boundary=' . $delimiter,
//'Content-Length:' . strlen($data)
//),
//CURLOPT_POST => true,
//CURLOPT_POSTFIELDS => $data,
//);
return array($delimiter, $data);
}
/**
* html encoding transform
*
* @param string $html
* @param string $in
* @param string $out
* @param string $content
* @param string $mode
* auto|iconv|mb_convert_encoding
* @return string
*/
public static function encoding($html, $in = null, $out = null, $mode = 'auto')
{
$valid = array(
'auto',
'iconv',
'mb_convert_encoding',
);
if (isset(self::$output_encoding))
{
$out = self::$output_encoding;
}
if ( ! isset($out))
{
$out = 'UTF-8';
}
if ( ! in_array($mode, $valid))
{
throw new Exception('invalid mode, mode='.$mode);
}
$if = function_exists('mb_convert_encoding');
$if = $if && ($mode == 'auto' || $mode == 'mb_convert_encoding');
if (function_exists('iconv') && ($mode == 'auto' || $mode == 'iconv'))
{
$func = 'iconv';
}
elseif ($if)
{
$func = 'mb_convert_encoding';
}
else
{
throw new Exception('charsetTrans failed, no function');
}
$pattern = '/(<meta[^>]*?charset=([\"\']?))([a-z\d_\-]*)(\2[^>]*?>)/is';
if ( ! isset($in))
{
$n = preg_match($pattern, $html, $in);
if ($n > 0)
{
$in = $in[3];
}
else
{
$in = null;
}
if (empty($in) and function_exists('mb_detect_encoding'))
{
$in = mb_detect_encoding($html, array('UTF-8', 'GBK', 'GB2312', 'LATIN1', 'ASCII', 'BIG5', 'ISO-8859-1'));
}
}
if (isset($in))
{
if ($in == 'ISO-8859-1')
{
$in = 'UTF-8';
}
$old = error_reporting(error_reporting() & ~E_NOTICE);
$html = call_user_func($func, $in, $out.'//IGNORE', $html);
error_reporting($old);
$html = preg_replace($pattern, "\\1$out\\4", $html, 1);
}
return $html;
}
}