You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
998 lines
31 KiB
998 lines
31 KiB
<?php
|
|
// +----------------------------------------------------------------------
|
|
// | PHPSpider [ A PHP Framework For Crawler ]
|
|
// +----------------------------------------------------------------------
|
|
// | Copyright (c) 2006-2014 https://doc.phpspider.org All rights reserved.
|
|
// +----------------------------------------------------------------------
|
|
// | Licensed ( http://www.apache.org/licenses/LICENSE-2.0 )
|
|
// +----------------------------------------------------------------------
|
|
// | Author: Seatle Yang <seatle@foxmail.com>
|
|
// +----------------------------------------------------------------------
|
|
|
|
// +----------------------------------------------------------------------
|
|
// | GET请求
|
|
// | requests::get('http://www.test.com');
|
|
// | SERVER
|
|
// | $_GET
|
|
// +----------------------------------------------------------------------
|
|
// | POST请求
|
|
// | $data = array('name'=>'request');
|
|
// | requests::post('http://www.test.com', $data);
|
|
// | SERVER
|
|
// | $_POST
|
|
// +----------------------------------------------------------------------
|
|
// | POST RESTful请求
|
|
// | $data = array('name'=>'request');
|
|
// | $data_string = json_encode($data);
|
|
// | requests::set_header("Content-Type", "application/json");
|
|
// | requests::post('http://www.test.com', $data_string);
|
|
// | SERVER
|
|
// | file_get_contents('php://input')
|
|
// +----------------------------------------------------------------------
|
|
// | POST 文件上传
|
|
// | $data = array('file1'=>''./data/phpspider.log'');
|
|
// | requests::post('http://www.test.com', null, $data);
|
|
// | SERVER
|
|
// | $_FILES
|
|
// +----------------------------------------------------------------------
|
|
// | 代理
|
|
// | requests::set_proxy(array('223.153.69.150:42354'));
|
|
// | $html = requests::get('https://www.test.com');
|
|
// +----------------------------------------------------------------------
|
|
|
|
//----------------------------------
|
|
// PHPSpider请求类文件
|
|
//----------------------------------
|
|
|
|
namespace phpspider\core;
|
|
|
|
if (!function_exists('curl_file_create'))
|
|
{
|
|
function curl_file_create($filename, $mimetype = '', $postname = '')
|
|
{
|
|
return "@$filename;filename="
|
|
. ($postname ?: basename($filename))
|
|
. ($mimetype ? ";type=$mimetype" : '');
|
|
}
|
|
}
|
|
|
|
class requests
|
|
{
|
|
const VERSION = '2.0.1';
|
|
|
|
protected static $ch = null;
|
|
|
|
/**** Public variables ****/
|
|
|
|
/* user definable vars */
|
|
|
|
public static $timeout = 15;
|
|
public static $encoding = null;
|
|
public static $input_encoding = null;
|
|
public static $output_encoding = null;
|
|
public static $cookies = array(); // array of cookies to pass
|
|
// $cookies['username'] = "seatle";
|
|
public static $rawheaders = array(); // array of raw headers to send
|
|
public static $domain_cookies = array(); // array of cookies for domain to pass
|
|
public static $hosts = array(); // random host binding for make request faster
|
|
public static $headers = array(); // headers returned from server sent here
|
|
public static $useragents = array("requests/2.0.0"); // random agent we masquerade as
|
|
public static $client_ips = array(); // random ip we masquerade as
|
|
public static $proxies = array(); // random proxy ip
|
|
public static $raw = ""; // head + body content returned from server sent here
|
|
public static $head = ""; // head content
|
|
public static $content = ""; // The body before encoding
|
|
public static $text = ""; // The body after encoding
|
|
public static $info = array(); // curl info
|
|
public static $history = 302; // http request status before redirect. ex:30x
|
|
public static $status_code = 0; // http request status
|
|
public static $error = ""; // error messages sent here
|
|
|
|
/**
|
|
* set timeout
|
|
* $timeout 为数组时会分别设置connect和read
|
|
*
|
|
* @param init or array $timeout
|
|
* @return
|
|
*/
|
|
public static function set_timeout($timeout)
|
|
{
|
|
self::$timeout = $timeout;
|
|
}
|
|
|
|
/**
|
|
* 设置代理
|
|
* 如果代理有多个,请求时会随机使用
|
|
*
|
|
* @param mixed $proxies
|
|
* array (
|
|
* 'socks5://user1:pass2@host:port',
|
|
* 'socks5://user2:pass2@host:port'
|
|
*)
|
|
* @return void
|
|
* @author seatle <seatle@foxmail.com>
|
|
* @created time :2016-09-18 10:17
|
|
*/
|
|
public static function set_proxy($proxy)
|
|
{
|
|
self::$proxies = is_array($proxy) ? $proxy : array($proxy);
|
|
}
|
|
|
|
/**
|
|
* 删除代理
|
|
* 因为每个链接信息里面都有代理信息,有的链接需要,有的不需要,所以必须提供一个删除功能
|
|
*
|
|
* @return void
|
|
* @author seatle <seatle@foxmail.com>
|
|
* @created time :2018-07-16 17:59
|
|
*/
|
|
public static function del_proxy()
|
|
{
|
|
self::$proxies = array();
|
|
}
|
|
|
|
/**
|
|
* 自定义请求头部
|
|
* 请求头内容可以用 requests::$rawheaders 来获取
|
|
* 比如获取Content-Type:requests::$rawheaders['Content-Type']
|
|
*
|
|
* @param string $headers
|
|
* @return void
|
|
*/
|
|
public static function set_header($key, $value)
|
|
{
|
|
self::$rawheaders[$key] = $value;
|
|
}
|
|
|
|
/**
|
|
* 设置全局COOKIE
|
|
*
|
|
* @param string $cookie
|
|
* @return void
|
|
*/
|
|
public static function set_cookie($key, $value, $domain = '')
|
|
{
|
|
if (empty($key))
|
|
{
|
|
return false;
|
|
}
|
|
if (!empty($domain))
|
|
{
|
|
self::$domain_cookies[$domain][$key] = $value;
|
|
}
|
|
else
|
|
{
|
|
self::$cookies[$key] = $value;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
/**
|
|
* 批量设置全局cookie
|
|
*
|
|
* @param mixed $cookies
|
|
* @param string $domain
|
|
* @return void
|
|
* @author seatle <seatle@foxmail.com>
|
|
* @created time :2017-08-03 18:06
|
|
*/
|
|
public static function set_cookies($cookies, $domain = '')
|
|
{
|
|
$cookies_arr = explode(';', $cookies);
|
|
if (empty($cookies_arr))
|
|
{
|
|
return false;
|
|
}
|
|
|
|
foreach ($cookies_arr as $cookie)
|
|
{
|
|
$cookie_arr = explode('=', $cookie, 2);
|
|
$key = $cookie_arr[0];
|
|
$value = empty($cookie_arr[1]) ? '' : $cookie_arr[1];
|
|
|
|
if (!empty($domain))
|
|
{
|
|
self::$domain_cookies[$domain][$key] = $value;
|
|
}
|
|
else
|
|
{
|
|
self::$cookies[$key] = $value;
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
/**
|
|
* 获取单一Cookie
|
|
*
|
|
* @param mixed $name cookie名称
|
|
* @param string $domain 不传则取全局cookie,就是手动set_cookie的cookie
|
|
* @return void
|
|
* @author seatle <seatle@foxmail.com>
|
|
* @created time :2017-08-03 18:06
|
|
*/
|
|
public static function get_cookie($name, $domain = '')
|
|
{
|
|
if (!empty($domain) && !isset(self::$domain_cookies[$domain]))
|
|
{
|
|
return '';
|
|
}
|
|
$cookies = empty($domain) ? self::$cookies : self::$domain_cookies[$domain];
|
|
return isset($cookies[$name]) ? $cookies[$name] : '';
|
|
}
|
|
|
|
/**
|
|
* 获取Cookie数组
|
|
*
|
|
* @param string $domain 不传则取全局cookie,就是手动set_cookie的cookie
|
|
* @return void
|
|
* @author seatle <seatle@foxmail.com>
|
|
* @created time :2017-08-03 18:06
|
|
*/
|
|
public static function get_cookies($domain = '')
|
|
{
|
|
if (!empty($domain) && !isset(self::$domain_cookies[$domain]))
|
|
{
|
|
return array();
|
|
}
|
|
return empty($domain) ? self::$cookies : self::$domain_cookies[$domain];
|
|
}
|
|
|
|
/**
|
|
* 删除Cookie
|
|
*
|
|
* @param string $domain 不传则删除全局Cookie
|
|
* @return void
|
|
* @author seatle <seatle@foxmail.com>
|
|
* @created time :2017-08-03 18:06
|
|
*/
|
|
public static function del_cookie($key, $domain = '')
|
|
{
|
|
if (empty($key))
|
|
{
|
|
return false;
|
|
}
|
|
|
|
if (!empty($domain) && !isset(self::$domain_cookies[$domain]))
|
|
{
|
|
return false;
|
|
}
|
|
|
|
if (!empty($domain))
|
|
{
|
|
if (isset(self::$domain_cookies[$domain][$key]))
|
|
{
|
|
unset(self::$domain_cookies[$domain][$key]);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
if (isset(self::$cookies[$key]))
|
|
{
|
|
unset(self::$cookies[$key]);
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
/**
|
|
* 删除Cookie
|
|
*
|
|
* @param string $domain 不传则删除全局Cookie
|
|
* @return void
|
|
* @author seatle <seatle@foxmail.com>
|
|
* @created time :2017-08-03 18:06
|
|
*/
|
|
public static function del_cookies($domain = '')
|
|
{
|
|
if (!empty($domain) && !isset(self::$domain_cookies[$domain]))
|
|
{
|
|
return false;
|
|
}
|
|
if ( empty($domain) )
|
|
{
|
|
self::$cookies = array();
|
|
}
|
|
else
|
|
{
|
|
if (isset(self::$domain_cookies[$domain]))
|
|
{
|
|
unset(self::$domain_cookies[$domain]);
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
/**
|
|
* 设置随机的user_agent
|
|
*
|
|
* @param string $useragent
|
|
* @return void
|
|
*/
|
|
public static function set_useragent($useragent)
|
|
{
|
|
self::$useragents = is_array($useragent) ? $useragent : array($useragent);
|
|
}
|
|
|
|
/**
|
|
* set referer
|
|
*
|
|
*/
|
|
public static function set_referer($referer)
|
|
{
|
|
self::$rawheaders['Referer'] = $referer;
|
|
}
|
|
|
|
/**
|
|
* 设置伪造IP
|
|
* 传入数组则为随机IP
|
|
* @param string $ip
|
|
* @return void
|
|
*/
|
|
public static function set_client_ip($ip)
|
|
{
|
|
self::$client_ips = is_array($ip) ? $ip : array($ip);
|
|
}
|
|
|
|
/**
|
|
* 删除伪造IP
|
|
*
|
|
* @return void
|
|
* @author seatle <seatle@foxmail.com>
|
|
* @created time :2018-07-16 17:59
|
|
*/
|
|
public static function del_client_ip()
|
|
{
|
|
self::$client_ips = array();
|
|
}
|
|
|
|
/**
|
|
* 设置中文请求
|
|
*
|
|
* @param string $lang
|
|
* @return void
|
|
*/
|
|
public static function set_accept_language($lang = 'zh-CN')
|
|
{
|
|
self::$rawheaders['Accept-Language'] = $lang;
|
|
}
|
|
|
|
/**
|
|
* 设置Hosts
|
|
* 负载均衡到不同的服务器,如果对方使用CDN,采用这个是最好的了
|
|
*
|
|
* @param string $hosts
|
|
* @return void
|
|
*/
|
|
public static function set_hosts($host, $ips = array())
|
|
{
|
|
$ips = is_array($ips) ? $ips : array($ips);
|
|
self::$hosts[$host] = $ips;
|
|
}
|
|
|
|
/**
|
|
* 分割返回的header和body
|
|
* header用来判断编码和获取Cookie
|
|
* body用来判断编码,得到编码前和编码后的内容
|
|
*
|
|
* @return void
|
|
* @author seatle <seatle@foxmail.com>
|
|
* @created time :2017-08-03 18:06
|
|
*/
|
|
public static function split_header_body()
|
|
{
|
|
$head = $body = '';
|
|
$head = substr(self::$raw, 0, self::$info['header_size']);
|
|
$body = substr(self::$raw, self::$info['header_size']);
|
|
// http header
|
|
self::$head = $head;
|
|
// The body before encoding
|
|
self::$content = $body;
|
|
|
|
//$http_headers = array();
|
|
//// 解析HTTP数据流
|
|
//if (!empty(self::$raw))
|
|
//{
|
|
//self::get_response_cookies($domain);
|
|
//// body里面可能有 \r\n\r\n,但是第一个一定是HTTP Header,去掉后剩下的就是body
|
|
//$array = explode("\r\n\r\n", self::$raw);
|
|
//foreach ($array as $k=>$v)
|
|
//{
|
|
//// post 方法会有两个http header:HTTP/1.1 100 Continue、HTTP/1.1 200 OK
|
|
//if (preg_match("#^HTTP/.*? 100 Continue#", $v))
|
|
//{
|
|
//unset($array[$k]);
|
|
//continue;
|
|
//}
|
|
//if (preg_match("#^HTTP/.*? \d+ #", $v))
|
|
//{
|
|
//$header = $v;
|
|
//unset($array[$k]);
|
|
//$http_headers = self::get_response_headers($v);
|
|
//}
|
|
//}
|
|
//$body = implode("\r\n\r\n", $array);
|
|
//}
|
|
|
|
// 设置了输出编码的转码,注意: xpath只支持utf-8,iso-8859-1 不要转,他本身就是utf-8
|
|
$body = self::encoding($body); //自动转码
|
|
// 转码后
|
|
self::$encoding = self::$output_encoding;
|
|
|
|
// The body after encoding
|
|
self::$text = $body;
|
|
return array($head, $body);
|
|
}
|
|
|
|
/**
|
|
* 获得域名相对应的Cookie
|
|
*
|
|
* @param mixed $header
|
|
* @param mixed $domain
|
|
* @return void
|
|
* @author seatle <seatle@foxmail.com>
|
|
* @created time :2017-08-03 18:06
|
|
*/
|
|
public static function get_response_cookies($header, $domain)
|
|
{
|
|
// 解析Cookie并存入 self::$cookies 方便调用
|
|
preg_match_all("/.*?Set\-Cookie: ([^\r\n]*)/i", $header, $matches);
|
|
$cookies = empty($matches[1]) ? array() : $matches[1];
|
|
|
|
// 解析到Cookie
|
|
if (!empty($cookies))
|
|
{
|
|
$cookies = implode(';', $cookies);
|
|
$cookies = explode(';', $cookies);
|
|
foreach ($cookies as $cookie)
|
|
{
|
|
$cookie_arr = explode('=', $cookie, 2);
|
|
// 过滤 httponly、secure
|
|
if (count($cookie_arr) < 2)
|
|
{
|
|
continue;
|
|
}
|
|
$cookie_name = !empty($cookie_arr[0]) ? trim($cookie_arr[0]) : '';
|
|
if (empty($cookie_name))
|
|
{
|
|
continue;
|
|
}
|
|
// 过滤掉domain路径
|
|
if (in_array(strtolower($cookie_name), array('path', 'domain', 'expires', 'max-age')))
|
|
{
|
|
continue;
|
|
}
|
|
self::$domain_cookies[$domain][trim($cookie_arr[0])] = trim($cookie_arr[1]);
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* 获得response header
|
|
* 此方法占时没有用到
|
|
*
|
|
* @param mixed $header
|
|
* @return void
|
|
* @author seatle <seatle@foxmail.com>
|
|
* @created time :2017-08-03 18:06
|
|
*/
|
|
public static function get_response_headers($header)
|
|
{
|
|
$headers = array();
|
|
$header_lines = explode("\n", $header);
|
|
if (!empty($header_lines))
|
|
{
|
|
foreach ($header_lines as $line)
|
|
{
|
|
$header_arr = explode(':', $line, 2);
|
|
$key = empty($header_arr[0]) ? '' : trim($header_arr[0]);
|
|
$val = empty($header_arr[1]) ? '' : trim($header_arr[1]);
|
|
if (empty($key) || empty($val))
|
|
{
|
|
continue;
|
|
}
|
|
$headers[$key] = $val;
|
|
}
|
|
}
|
|
self::$headers = $headers;
|
|
return self::$headers;
|
|
}
|
|
|
|
/**
|
|
* 获取编码
|
|
* @param $string
|
|
* @return string
|
|
*/
|
|
public static function get_encoding($string)
|
|
{
|
|
$encoding = mb_detect_encoding($string, array('UTF-8', 'GBK', 'GB2312', 'LATIN1', 'ASCII', 'BIG5', 'ISO-8859-1'));
|
|
return strtolower($encoding);
|
|
}
|
|
|
|
/**
|
|
* 移除页面head区域代码
|
|
* @param $html
|
|
* @return mixed
|
|
*/
|
|
private static function _remove_head($html)
|
|
{
|
|
return preg_replace('/<head.+?>.+<\/head>/is', '<head></head>', $html);
|
|
}
|
|
|
|
/**
|
|
* 简单的判断一下参数是否为一个URL链接
|
|
* @param string $str
|
|
* @return boolean
|
|
*/
|
|
private static function _is_url($url)
|
|
{
|
|
//$pattern = '/^http(s)?:\\/\\/.+/';
|
|
$pattern = "/\b(([\w-]+:\/\/?|www[.])[^\s()<>]+(?:\([\w\d]+\)|([^[:punct:]\s]|\/)))/";
|
|
if (preg_match($pattern, $url))
|
|
{
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
/**
|
|
* 初始化 CURL
|
|
*
|
|
*/
|
|
public static function init()
|
|
{
|
|
if (!is_resource ( self::$ch ))
|
|
{
|
|
self::$ch = curl_init ();
|
|
curl_setopt( self::$ch, CURLOPT_RETURNTRANSFER, true );
|
|
curl_setopt( self::$ch, CURLOPT_HEADER, false );
|
|
curl_setopt( self::$ch, CURLOPT_USERAGENT, "phpspider-requests/".self::VERSION );
|
|
// 如果设置了两个时间,就分开设置
|
|
if (is_array(self::$timeout))
|
|
{
|
|
curl_setopt( self::$ch, CURLOPT_CONNECTTIMEOUT, self::$timeout[0] );
|
|
curl_setopt( self::$ch, CURLOPT_TIMEOUT, self::$timeout[1]);
|
|
}
|
|
else
|
|
{
|
|
curl_setopt(self::$ch, CURLOPT_CONNECTTIMEOUT, ceil(self::$timeout / 2));
|
|
curl_setopt(self::$ch, CURLOPT_TIMEOUT, self::$timeout);
|
|
}
|
|
curl_setopt(self::$ch, CURLOPT_MAXREDIRS, 5); //maximum number of redirects allowed
|
|
// 在多线程处理场景下使用超时选项时,会忽略signals对应的处理函数,但是无耐的是还有小概率的crash情况发生
|
|
curl_setopt( self::$ch, CURLOPT_NOSIGNAL, true);
|
|
}
|
|
return self::$ch;
|
|
}
|
|
|
|
/**
|
|
* get 请求
|
|
*/
|
|
public static function get($url, $fields = array(), $allow_redirects = true, $cert = NULL)
|
|
{
|
|
self::init ();
|
|
return self::request($url, 'get', $fields, NULL, $allow_redirects, $cert);
|
|
}
|
|
|
|
/**
|
|
* post 请求
|
|
* $fields 有三种类型:1、数组;2、http query;3、json
|
|
* 1、array('name'=>'yangzetao')
|
|
* 2、http_build_query(array('name'=>'yangzetao'))
|
|
* 3、json_encode(array('name'=>'yangzetao'))
|
|
* 前两种是普通的post,可以用$_POST方式获取
|
|
* 第三种是post stream( json rpc,其实就是webservice )
|
|
* 虽然是post方式,但是只能用流方式 http://input 后者 $HTTP_RAW_POST_DATA 获取
|
|
*
|
|
* @param mixed $url
|
|
* @param array $fields
|
|
* @param mixed $proxies
|
|
* @static
|
|
* @access public
|
|
* @return void
|
|
*/
|
|
public static function post($url, $fields = array(), $files = array(), $allow_redirects = true, $cert = NULL)
|
|
{
|
|
self::init ();
|
|
return self::request($url, 'POST', $fields, $files, $allow_redirects, $cert);
|
|
}
|
|
|
|
public static function put($url, $fields = array(), $allow_redirects = true, $cert = NULL)
|
|
{
|
|
self::init ();
|
|
return self::request($url, 'PUT', $fields, $allow_redirects, $cert);
|
|
}
|
|
|
|
public static function delete($url, $fields = array(), $allow_redirects = true, $cert = NULL)
|
|
{
|
|
self::init ();
|
|
return self::request($url, 'DELETE', $fields, $allow_redirects, $cert);
|
|
}
|
|
|
|
// 响应HTTP头域里的元信息
|
|
// 此方法被用来获取请求实体的元信息而不需要传输实体主体(entity-body)
|
|
// 此方法经常被用来测试超文本链接的有效性,可访问性,和最近的改变。.
|
|
public static function head($url, $fields = array(), $allow_redirects = true, $cert = NULL)
|
|
{
|
|
self::init ();
|
|
self::request($url, 'HEAD', $fields, $allow_redirects, $cert);
|
|
}
|
|
|
|
public static function options($url, $fields = array(), $allow_redirects = true, $cert = NULL)
|
|
{
|
|
self::init ();
|
|
return self::request($url, 'OPTIONS', $fields, $allow_redirects, $cert);
|
|
}
|
|
|
|
public static function patch($url, $fields = array(), $allow_redirects = true, $cert = NULL)
|
|
{
|
|
self::init ();
|
|
return self::request($url, 'PATCH', $fields, $allow_redirects, $cert);
|
|
}
|
|
|
|
/**
|
|
* request
|
|
*
|
|
* @param mixed $url 请求URL
|
|
* @param string $method 请求方法
|
|
* @param array $fields 表单字段
|
|
* @param array $files 上传文件
|
|
* @param mixed $cert CA证书
|
|
* @return void
|
|
* @author seatle <seatle@foxmail.com>
|
|
* @created time :2017-08-03 18:06
|
|
*/
|
|
public static function request($url, $method = 'GET', $fields = array(), $files = array(), $allow_redirects = true, $cert = NULL)
|
|
{
|
|
$method = strtoupper($method);
|
|
if(!self::_is_url($url))
|
|
{
|
|
self::$error = "You have requested URL ({$url}) is not a valid HTTP address";
|
|
return false;
|
|
}
|
|
|
|
// 如果是 get 方式,直接拼凑一个 url 出来
|
|
if ($method == 'GET' && !empty($fields))
|
|
{
|
|
$url = $url.(strpos($url, '?') === false ? '?' : '&').http_build_query($fields);
|
|
}
|
|
|
|
$parse_url = parse_url($url);
|
|
if (empty($parse_url) || empty($parse_url['host']) || !in_array($parse_url['scheme'], array('http', 'https')))
|
|
{
|
|
self::$error = "No connection adapters were found for '{$url}'";
|
|
return false;
|
|
}
|
|
$scheme = $parse_url['scheme'];
|
|
$domain = $parse_url['host'];
|
|
|
|
// 随机绑定 hosts,做负载均衡
|
|
if (self::$hosts)
|
|
{
|
|
if (isset(self::$hosts[$domain]))
|
|
{
|
|
$hosts = self::$hosts[$domain];
|
|
$key = rand(0, count($hosts)-1);
|
|
$ip = $hosts[$key];
|
|
$url = str_replace($domain, $ip, $url);
|
|
self::$rawheaders['Host'] = $domain;
|
|
}
|
|
}
|
|
|
|
curl_setopt( self::$ch, CURLOPT_URL, $url );
|
|
|
|
if ($method != 'GET')
|
|
{
|
|
// 如果是 post 方式
|
|
if ($method == 'POST')
|
|
{
|
|
//curl_setopt( self::$ch, CURLOPT_POST, true );
|
|
$tmpheaders = array_change_key_case(self::$rawheaders, CASE_LOWER);
|
|
// 有些RESTful服务只接受JSON形态的数据
|
|
// CURLOPT_POST会把上傳的文件类型设为 multipart/form-data
|
|
// 把CURLOPT_POSTFIELDS的内容按multipart/form-data 的形式编码
|
|
// CURLOPT_CUSTOMREQUEST可以按指定内容上传
|
|
if ( isset($tmpheaders['content-type']) && $tmpheaders['content-type'] == 'application/json' )
|
|
{
|
|
curl_setopt( self::$ch, CURLOPT_CUSTOMREQUEST, $method );
|
|
}
|
|
else
|
|
{
|
|
curl_setopt( self::$ch, CURLOPT_POST, true );
|
|
}
|
|
|
|
$file_fields = array();
|
|
if (!empty($files))
|
|
{
|
|
foreach ($files as $postname => $file)
|
|
{
|
|
$filepath = realpath($file);
|
|
// 如果文件不存在
|
|
if (!file_exists($filepath))
|
|
{
|
|
continue;
|
|
}
|
|
|
|
$filename = basename($filepath);
|
|
$type = self::get_mimetype($filepath);
|
|
$file_fields[$postname] = curl_file_create($filepath, $type, $filename);
|
|
// curl -F "name=seatle&file=@/absolute/path/to/image.png" htt://localhost/uploadfile.php
|
|
//$cfile = '@'.realpath($filename).";type=".$type.";filename=".$filename;
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
self::$rawheaders['X-HTTP-Method-Override'] = $method;
|
|
curl_setopt( self::$ch, CURLOPT_CUSTOMREQUEST, $method );
|
|
}
|
|
|
|
if ( $method == 'POST' )
|
|
{
|
|
// 不是上传文件的,用http_build_query, 能实现更好的兼容性,更小的请求数据包
|
|
if ( empty($file_fields) )
|
|
{
|
|
// post方式
|
|
if ( is_array($fields) )
|
|
{
|
|
$fields = http_build_query($fields);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// 有post数据
|
|
if ( is_array($fields) && !empty($fields) )
|
|
{
|
|
// 某些server可能会有问题
|
|
$fields = array_merge($fields, $file_fields);
|
|
}
|
|
else
|
|
{
|
|
$fields = $file_fields;
|
|
}
|
|
}
|
|
|
|
// 不能直接传数组,不知道是什么Bug,会非常慢
|
|
curl_setopt( self::$ch, CURLOPT_POSTFIELDS, $fields );
|
|
}
|
|
}
|
|
|
|
$cookies = self::get_cookies();
|
|
$domain_cookies = self::get_cookies($domain);
|
|
$cookies = array_merge($cookies, $domain_cookies);
|
|
// 是否设置了cookie
|
|
if (!empty($cookies))
|
|
{
|
|
foreach ($cookies as $key=>$value)
|
|
{
|
|
$cookie_arr[] = $key.'='.$value;
|
|
}
|
|
$cookies = implode('; ', $cookie_arr);
|
|
curl_setopt(self::$ch, CURLOPT_COOKIE, $cookies);
|
|
}
|
|
|
|
if (!empty(self::$useragents))
|
|
{
|
|
$key = rand(0, count(self::$useragents) - 1);
|
|
self::$rawheaders['User-Agent'] = self::$useragents[$key];
|
|
}
|
|
|
|
if (!empty(self::$client_ips))
|
|
{
|
|
$key = rand(0, count(self::$client_ips) - 1);
|
|
self::$rawheaders['CLIENT-IP'] = self::$client_ips[$key];
|
|
self::$rawheaders['X-FORWARDED-FOR'] = self::$client_ips[$key];
|
|
}
|
|
|
|
if (self::$rawheaders)
|
|
{
|
|
$http_headers = array();
|
|
foreach (self::$rawheaders as $k=>$v)
|
|
{
|
|
$http_headers[] = $k.': '.$v;
|
|
}
|
|
curl_setopt( self::$ch, CURLOPT_HTTPHEADER, $http_headers );
|
|
}
|
|
|
|
curl_setopt( self::$ch, CURLOPT_ENCODING, 'gzip' );
|
|
|
|
// 关闭验证
|
|
if ($scheme == 'https')
|
|
{
|
|
curl_setopt(self::$ch, CURLOPT_SSL_VERIFYPEER, false);
|
|
curl_setopt(self::$ch, CURLOPT_SSL_VERIFYHOST, false);
|
|
}
|
|
|
|
if (self::$proxies)
|
|
{
|
|
$key = rand(0, count(self::$proxies) - 1);
|
|
$proxy = self::$proxies[$key];
|
|
curl_setopt( self::$ch, CURLOPT_PROXY, $proxy );
|
|
}
|
|
|
|
// header + body,header 里面有 cookie
|
|
curl_setopt( self::$ch, CURLOPT_HEADER, true );
|
|
// 请求跳转后的内容
|
|
if ($allow_redirects)
|
|
{
|
|
curl_setopt( self::$ch, CURLOPT_FOLLOWLOCATION, true);
|
|
}
|
|
|
|
self::$raw = curl_exec ( self::$ch );
|
|
// 真实url
|
|
//$location = curl_getinfo( self::$ch, CURLINFO_EFFECTIVE_URL);
|
|
self::$info = curl_getinfo( self::$ch );
|
|
//print_r(self::$info);
|
|
self::$status_code = self::$info['http_code'];
|
|
if (self::$raw === false)
|
|
{
|
|
self::$error = 'Curl error: ' . curl_error( self::$ch );
|
|
//trigger_error(self::$error, E_USER_WARNING);
|
|
}
|
|
|
|
// 关闭句柄
|
|
curl_close( self::$ch );
|
|
|
|
// 请求成功之后才把URL存起来
|
|
list($header, $text) = self::split_header_body();
|
|
self::$history = self::get_history($header);
|
|
self::$headers = self::get_response_headers($header);
|
|
self::get_response_cookies($header, $domain);
|
|
//$data = substr($data, 10);
|
|
//$data = gzinflate($data);
|
|
return $text;
|
|
}
|
|
|
|
public static function get_history($header)
|
|
{
|
|
$status_code = 0;
|
|
$lines = explode("\n", $header);
|
|
foreach ($lines as $line)
|
|
{
|
|
$line = trim($line);
|
|
if (preg_match("#^HTTP/.*? (\d+) Found#", $line, $out))
|
|
{
|
|
$status_code = empty($out[1]) ? 0 : intval($out[1]);
|
|
}
|
|
}
|
|
return $status_code;
|
|
}
|
|
|
|
// 获取 mimetype
|
|
public static function get_mimetype($filepath)
|
|
{
|
|
$fp = finfo_open(FILEINFO_MIME);
|
|
$mime = finfo_file($fp, $filepath);
|
|
finfo_close($fp);
|
|
$arr = explode(';', $mime);
|
|
$type = empty($arr[0]) ? '' : $arr[0];
|
|
return $type;
|
|
}
|
|
|
|
/**
|
|
* 拼凑文件和表单
|
|
* 占时没有用到
|
|
*
|
|
* @param mixed $post_fields
|
|
* @param mixed $file_fields
|
|
* @return void
|
|
* @author seatle <seatle@foxmail.com>
|
|
* @created time :2017-08-03 18:06
|
|
*/
|
|
public static function get_postfile_form($post_fields, $file_fields)
|
|
{
|
|
// 构造post数据
|
|
$data = '';
|
|
$delimiter = '-------------' . uniqid();
|
|
// 表单数据
|
|
foreach ($post_fields as $name => $content)
|
|
{
|
|
$data .= '--'.$delimiter."\r\n";
|
|
$data .= 'Content-Disposition: form-data; name = "'.$name.'"';
|
|
$data .= "\r\n\r\n";
|
|
$data .= $content;
|
|
$data .= "\r\n";
|
|
}
|
|
|
|
foreach ($file_fields as $input_name => $file)
|
|
{
|
|
$data .= '--'.$delimiter."\r\n";
|
|
$data .= 'Content-Disposition: form-data; name = "'.$input_name.'";'.
|
|
' filename="'.$file['filename'].'"'."\r\n";
|
|
$data .= "Content-Type: {$file['type']}\r\n";
|
|
$data .= "\r\n";
|
|
$data .= $file['content'];
|
|
$data .= "\r\n";
|
|
}
|
|
|
|
// 结束符
|
|
$data .= '--'.$delimiter."--\r\n";
|
|
|
|
//return array(
|
|
//CURLOPT_HTTPHEADER => array(
|
|
//'Content-Type:multipart/form-data;boundary=' . $delimiter,
|
|
//'Content-Length:' . strlen($data)
|
|
//),
|
|
//CURLOPT_POST => true,
|
|
//CURLOPT_POSTFIELDS => $data,
|
|
//);
|
|
return array($delimiter, $data);
|
|
}
|
|
|
|
/**
|
|
* html encoding transform
|
|
*
|
|
* @param string $html
|
|
* @param string $in
|
|
* @param string $out
|
|
* @param string $content
|
|
* @param string $mode
|
|
* auto|iconv|mb_convert_encoding
|
|
* @return string
|
|
*/
|
|
public static function encoding($html, $in = null, $out = null, $mode = 'auto')
|
|
{
|
|
$valid = array(
|
|
'auto',
|
|
'iconv',
|
|
'mb_convert_encoding',
|
|
);
|
|
if (isset(self::$output_encoding))
|
|
{
|
|
$out = self::$output_encoding;
|
|
}
|
|
if ( ! isset($out))
|
|
{
|
|
$out = 'UTF-8';
|
|
}
|
|
if ( ! in_array($mode, $valid))
|
|
{
|
|
throw new Exception('invalid mode, mode='.$mode);
|
|
}
|
|
$if = function_exists('mb_convert_encoding');
|
|
$if = $if && ($mode == 'auto' || $mode == 'mb_convert_encoding');
|
|
if (function_exists('iconv') && ($mode == 'auto' || $mode == 'iconv'))
|
|
{
|
|
$func = 'iconv';
|
|
}
|
|
elseif ($if)
|
|
{
|
|
$func = 'mb_convert_encoding';
|
|
}
|
|
else
|
|
{
|
|
throw new Exception('charsetTrans failed, no function');
|
|
}
|
|
|
|
$pattern = '/(<meta[^>]*?charset=([\"\']?))([a-z\d_\-]*)(\2[^>]*?>)/is';
|
|
if ( ! isset($in))
|
|
{
|
|
$n = preg_match($pattern, $html, $in);
|
|
if ($n > 0)
|
|
{
|
|
$in = $in[3];
|
|
}
|
|
else
|
|
{
|
|
$in = null;
|
|
}
|
|
if (empty($in) and function_exists('mb_detect_encoding'))
|
|
{
|
|
$in = mb_detect_encoding($html, array('UTF-8', 'GBK', 'GB2312', 'LATIN1', 'ASCII', 'BIG5', 'ISO-8859-1'));
|
|
}
|
|
}
|
|
|
|
if (isset($in))
|
|
{
|
|
if ($in == 'ISO-8859-1')
|
|
{
|
|
$in = 'UTF-8';
|
|
}
|
|
$old = error_reporting(error_reporting() & ~E_NOTICE);
|
|
$html = call_user_func($func, $in, $out.'//IGNORE', $html);
|
|
error_reporting($old);
|
|
$html = preg_replace($pattern, "\\1$out\\4", $html, 1);
|
|
}
|
|
return $html;
|
|
}
|
|
}
|
|
|