e__B49#rO&;Do6O9iu*j@p_(P3!kCq{zDuV5%b7-H0teYx%>*@3y_%6kt4W++=B9yOPV5e&jTFzsV^M-E&MU)?9d0gUYe-%
zJye0l*m&l5i4piuJ7AbN$+j4+33)KYxk9-fZJ1ADnKFMsdRc`z0^JTo0l^2l*ed(<
zGC+8)+=rw~hG26F<(%UnSM^Bh-k}qhqz-Sv24=3+*RcqT=e=GKabxiBPc*I;sJqhB
z!H38cFz%ftc-P@L=&GBb!(2+G!FlWK@N1_OHI1T->+EXTH;;Jy23y+QQZN3EY?5rr
z?YU-fDn>W}r;v08%TWK+8*t<2MquQA2Pn6J8k(A$*Q4G4@u(Z#sHZuBXVk-7YMsej
z!p*84;ULPy#j_>q`KfZS@@WpZPVp|dDbGw&+}oGBmJUv|9{~97X==|wKaax;L3_`g
z5eB)e6XG|}FdG6$o*~Oj87%3J4x?ishRN+1PlbT}ZnnQ&Vi%=sYk|9E<`i^|N
zEYN@9oI4VTs1#K`gXxrgeTmwl*|ecJ42O9hb8J(L&?fvR!e6V&&om#kF$3}`sDv@Z
z${qcI71i0pVaA5$MRpFqVKX2%0Gy8jxx9jToGFCzg2v@H!6!BitnKG#u*#}qTsfIS
z-VadPk1}#m`CS5Wz;VEqM|7!%HO?#E$_v8&swtw@Iwaw#^u+~nYMFm(6TMfgu3GRA
zzNXU2_4Nx8^q`E4rajlMHXZrlhF{?cvoL^6(J{38AsOKdw#$5VlkiVMen%;(c>5|}
z-8wIqOC%Bs@9G2ohvyr@4wHgS_1j87KyeGQ1M=7~ow(eepyR9ahxTHu>5Qadxh?;X
zY%}GS$r6xr*@q}OU__LNPmwf~7ktWC89X4(f(_#JGkprClZq4PC2GxB1UOI$!7X!T
zJ8->J`tJJ=jr4U?DIx@>c6Q1UxQy^kV_nl7Zn)Zk?_Rzch2=XEn-lO8Et7aqKqqS+
zh!A3WpfHAOdkeCVu%I*=gd+|`kc-g|$8aU9FEqneo%Pgqh3f4<0lcZi?QX&s&Z2-k
zfoaPNPuo)MxRbK}1&&L`i~5*BLqlxsns`UM&R1!!GYW87tR98k7+6^U8t4J1P8QjK
zG5D8A8q`(?`?gY2Eji~EAB&zFeml!;J2d-I4=ww8Q*`{TtFRBnCvl1A|5^0Z4io2O
zz))rySardv1~AoDDDXnVsEdQ}exb|MS7fUvnrrG=0>f@;1+L4ifD6}c4cHg#e60Qp
zS`W(gqt?1Jq&sC7gIR0zGu?Y7E?xabUZw~h*{Mf)Mdh{F%iQM)vaP`+}3
z2|mS4HbeKcCO0R8WYXK1+Jj#o^}Swggv|hy+inEd4Q2k@hK@6^S*`v26~9A4roC1S
zbov4{)|I#WtM6M=n6-;zPD&%jGw3sZCu(q~>a`;K5c2k&h=X7S*!x(JhvWfIH2}pN
z2Q)>gs9=%&J?C0O<#K^v?l=jByN<7s=uD;!iLa?Tq^olkkzBv4sYRhJjC1Rpc7kD2
zauhm$4&?sW-GDN*b@-XYTSFlc4@D-nEYA+xtrhMsiy&WGStJx@^4Kf#D)5@zhq*n8
zpQB#&{|ciTbqysH6l~`CubSOoDcASo+dOTDz?woVVd|dOkB)B*6t1|zC?Rq30GY9$zNwXGcb(|I35qxAtQ<+VV7ip~5VSUqIBTnUp0-iTXxw
z5(e>54wk|7oLY!ipq@8EU{u(juMrC1bfPY*-l*~w3~{9My`H|nx*Hiemt+~!W`j3k
zJQL2Bx+F(y?X<<}gwz1#iEQc-4%hPFP;I#HFZRBkizM>uF{I_T
zsAwtOEH&(l@=OsK+46+-SiLrDOVT&nzlI9HYB8I3zRt^BuXF+by@7iwcQexd4;dX$
A@c;k-
literal 0
HcmV?d00001
diff --git a/vendor/curl/curl/tests/server/Dockerfile b/vendor/curl/curl/tests/server/Dockerfile
new file mode 100644
index 0000000..96689f8
--- /dev/null
+++ b/vendor/curl/curl/tests/server/Dockerfile
@@ -0,0 +1,9 @@
+FROM alpine:3.7
+
+RUN apk add --no-cache php5-cli php5-curl php5-gd php5-phar php5-json php5-openssl php5-dom
+
+COPY php-curl-test php-curl-test
+
+EXPOSE 80
+
+CMD ["php5", "-S", "0.0.0.0:80", "-t", "php-curl-test"]
diff --git a/vendor/curl/curl/tests/server/php-curl-test/deploy.php b/vendor/curl/curl/tests/server/php-curl-test/deploy.php
new file mode 100644
index 0000000..0d8bdab
--- /dev/null
+++ b/vendor/curl/curl/tests/server/php-curl-test/deploy.php
@@ -0,0 +1,37 @@
+\$ {$command}\n";
+ $output .= htmlentities(trim($tmp)) . "\n";
+}
+
+// Make it pretty for manual user access (and why not?)
+?>
+
+
+
+
+ GIT DEPLOYMENT SCRIPT
+
+
+
+. ____ . ____________________________
+|/ \| | |
+[| ♥ ♥ |] | Git Deployment Script v0.1 |
+|___==___| / © oodavid 2012 |
+|____________________________|
+
+
+
+
+
\ No newline at end of file
diff --git a/vendor/curl/curl/tests/server/php-curl-test/http_basic_auth.php b/vendor/curl/curl/tests/server/php-curl-test/http_basic_auth.php
new file mode 100644
index 0000000..336fb0c
--- /dev/null
+++ b/vendor/curl/curl/tests/server/php-curl-test/http_basic_auth.php
@@ -0,0 +1,14 @@
+ $_SERVER['PHP_AUTH_USER'],
+ 'password' => $_SERVER['PHP_AUTH_PW'],
+));
\ No newline at end of file
diff --git a/vendor/curl/curl/tests/server/php-curl-test/post_file_path_upload.php b/vendor/curl/curl/tests/server/php-curl-test/post_file_path_upload.php
new file mode 100644
index 0000000..aa54477
--- /dev/null
+++ b/vendor/curl/curl/tests/server/php-curl-test/post_file_path_upload.php
@@ -0,0 +1,21 @@
+ '_COOKIE',
+ 'delete' => '_GET',
+ 'post' => '_POST',
+ 'put' => '_GET',
+ 'server' => '_SERVER',
+);
+
+if(isset($data_mapping[$test])) {
+ $data = ${$data_mapping[$test]};
+ $value = isset($data[$key]) ? $data[$key] : '';
+echo $value;
+} else {
+ echo "Error.";
+}
diff --git a/vendor/owner888/phpspider/README.md b/vendor/owner888/phpspider/README.md
new file mode 100644
index 0000000..0732cb2
--- /dev/null
+++ b/vendor/owner888/phpspider/README.md
@@ -0,0 +1,52 @@
+# phpspider -- PHP蜘蛛爬虫框架
+《我用爬虫一天时间“偷了”知乎一百万用户,只为证明PHP是世界上最好的语言 》所使用的程序
+
+phpspider是一个爬虫开发框架。使用本框架,你不用了解爬虫的底层技术实现,爬虫被网站屏蔽、有些网站需要登录或验证码识别才能爬取等问题。简单几行PHP代码,就可以创建自己的爬虫,利用框架封装的多进程Worker类库,代码更简洁,执行效率更高速度更快。
+
+demo目录下有一些特定网站的爬取规则,只要你安装了PHP环境,代码就可以在命令行下直接跑。 对爬虫感兴趣的开发者可以加QQ群一起讨论:147824717。
+
+下面以糗事百科为例, 来看一下我们的爬虫长什么样子:
+
+```
+$configs = array(
+ 'name' => '糗事百科',
+ 'domains' => array(
+ 'qiushibaike.com',
+ 'www.qiushibaike.com'
+ ),
+ 'scan_urls' => array(
+ 'http://www.qiushibaike.com/'
+ ),
+ 'content_url_regexes' => array(
+ "http://www.qiushibaike.com/article/\d+"
+ ),
+ 'list_url_regexes' => array(
+ "http://www.qiushibaike.com/8hr/page/\d+\?s=\d+"
+ ),
+ 'fields' => array(
+ array(
+ // 抽取内容页的文章内容
+ 'name' => "article_content",
+ 'selector' => "//*[@id='single-next-link']",
+ 'required' => true
+ ),
+ array(
+ // 抽取内容页的文章作者
+ 'name' => "article_author",
+ 'selector' => "//div[contains(@class,'author')]//h2",
+ 'required' => true
+ ),
+ ),
+);
+$spider = new phpspider($configs);
+$spider->start();
+```
+爬虫的整体框架就是这样, 首先定义了一个$configs数组, 里面设置了待爬网站的一些信息, 然后通过调用```$spider = new phpspider($configs);```和```$spider->start();```来配置并启动爬虫.
+
+#### 运行界面如下:
+
+
+
+更多详细内容,移步到:
+
+[开发文档](http://doc.phpspider.org)
diff --git a/vendor/owner888/phpspider/autoloader.php b/vendor/owner888/phpspider/autoloader.php
new file mode 100644
index 0000000..6870644
--- /dev/null
+++ b/vendor/owner888/phpspider/autoloader.php
@@ -0,0 +1,77 @@
+
+ * @copyright seatle
+ * @link http://www.phpspider.org/
+ * @license http://www.opensource.org/licenses/mit-license.php MIT License
+ */
+namespace phpspider;
+
+/**
+ * autoloader.
+ */
+class autoloader
+{
+ /**
+ * Autoload root path.
+ *
+ * @var string
+ */
+ protected static $_autoload_root_path = '';
+
+ /**
+ * Set autoload root path.
+ *
+ * @param string $root_path
+ * @return void
+ */
+ public static function set_root_path($root_path)
+ {
+ self::$_autoload_root_path = $root_path;
+ }
+
+ /**
+ * Load files by namespace.
+ *
+ * @param string $name
+ * @return boolean
+ */
+ public static function load_by_namespace($name)
+ {
+ $class_path = str_replace('\\', DIRECTORY_SEPARATOR, $name);
+
+ if (strpos($name, 'phpspider\\') === 0)
+ {
+ $class_file = __DIR__ . substr($class_path, strlen('phpspider')) . '.php';
+ }
+ else
+ {
+ if (self::$_autoload_root_path)
+ {
+ $class_file = self::$_autoload_root_path . DIRECTORY_SEPARATOR . $class_path . '.php';
+ }
+ if (empty($class_file) || !is_file($class_file))
+ {
+ $class_file = __DIR__ . DIRECTORY_SEPARATOR . '..' . DIRECTORY_SEPARATOR . "$class_path.php";
+ }
+ }
+
+ if (is_file($class_file))
+ {
+ require_once($class_file);
+ if (class_exists($name, false))
+ {
+ return true;
+ }
+ }
+ return false;
+ }
+}
+
+spl_autoload_register('\phpspider\autoloader::load_by_namespace');
diff --git a/vendor/owner888/phpspider/composer.json b/vendor/owner888/phpspider/composer.json
new file mode 100644
index 0000000..da533c7
--- /dev/null
+++ b/vendor/owner888/phpspider/composer.json
@@ -0,0 +1,38 @@
+{
+ "name": "owner888/phpspider",
+ "type": "library",
+ "keywords": [
+ "framework",
+ "phpspider"
+ ],
+ "homepage": "http://www.phpspider.org",
+ "license": "MIT",
+ "description": "The PHPSpider Framework.",
+ "authors": [
+ {
+ "name": "Seatle Yang",
+ "email": "seatle@foxmail.com",
+ "homepage": "http://www.phpspider.org",
+ "role": "Developer"
+ }
+ ],
+ "support": {
+ "email": "seatle@foxmail.com",
+ "issues": "https://github.com/owner888/phpspider/issues",
+ "forum": "http://wenda.phpspider.org/",
+ "wiki": "http://doc.phpspider.org/",
+ "source": "https://github.com/owner888/phpspider"
+ },
+ "require": {
+ "php": ">=5.5.0"
+ },
+ "suggest": {
+ "ext-pcntl、ext-redis": "For better performance. "
+ },
+ "autoload": {
+ "psr-4": {
+ "phpspider\\": "./"
+ }
+ },
+ "minimum-stability": "dev"
+}
diff --git a/vendor/owner888/phpspider/core/cache.php b/vendor/owner888/phpspider/core/cache.php
new file mode 100644
index 0000000..a6ed009
--- /dev/null
+++ b/vendor/owner888/phpspider/core/cache.php
@@ -0,0 +1,64 @@
+
+// +----------------------------------------------------------------------
+
+//----------------------------------
+// PHPSpider缓存类文件
+//----------------------------------
+
+class cache
+{
+ // 多进程下面不能用单例模式
+ //protected static $_instance;
+ /**
+ * 获取实例
+ *
+ * @return void
+ * @author seatle
+ * @created time :2016-04-10 22:55
+ */
+ public static function init()
+ {
+ if(extension_loaded('Redis'))
+ {
+ $_instance = new Redis();
+ }
+ else
+ {
+ $errmsg = "extension redis is not installed";
+ log::add($errmsg, "Error");
+ return null;
+ }
+ // 这里不能用pconnect,会报错:Uncaught exception 'RedisException' with message 'read error on connection'
+ $_instance->connect($GLOBALS['config']['redis']['host'], $GLOBALS['config']['redis']['port'], $GLOBALS['config']['redis']['timeout']);
+
+ // 验证
+ if ($GLOBALS['config']['redis']['pass'])
+ {
+ if ( !$_instance->auth($GLOBALS['config']['redis']['pass']) )
+ {
+ $errmsg = "Redis Server authentication failed!!";
+ log::add($errmsg, "Error");
+ return null;
+ }
+ }
+
+ // 不序列化的话不能存数组,用php的序列化方式其他语言又不能读取,所以这里自己用json序列化了,性能还比php的序列化好1.4倍
+ //$_instance->setOption(Redis::OPT_SERIALIZER, Redis::SERIALIZER_NONE); // don't serialize data
+ //$_instance->setOption(Redis::OPT_SERIALIZER, Redis::SERIALIZER_PHP); // use built-in serialize/unserialize
+ //$_instance->setOption(Redis::OPT_SERIALIZER, Redis::SERIALIZER_IGBINARY); // use igBinary serialize/unserialize
+
+ $_instance->setOption(Redis::OPT_PREFIX, $GLOBALS['config']['redis']['prefix'] . ":");
+
+ return $_instance;
+ }
+}
+
+
diff --git a/vendor/owner888/phpspider/core/constants.php b/vendor/owner888/phpspider/core/constants.php
new file mode 100644
index 0000000..2d6dd4e
--- /dev/null
+++ b/vendor/owner888/phpspider/core/constants.php
@@ -0,0 +1,55 @@
+
+// +----------------------------------------------------------------------
+
+//----------------------------------
+// PHPSpider公共入口文件
+//----------------------------------
+
+//namespace phpspider\core;
+
+// Display errors.
+ini_set('display_errors', 'on');
+// Reporting all.
+error_reporting(E_ALL);
+
+// 永不超时
+ini_set('max_execution_time', 0);
+set_time_limit(0);
+// 内存限制,如果外面设置的内存比 /etc/php/php-cli.ini 大,就不要设置了
+if (intval(ini_get("memory_limit")) < 1024)
+{
+ ini_set('memory_limit', '1024M');
+}
+
+if( PHP_SAPI != 'cli' )
+{
+ exit("You must run the CLI environment\n");
+}
+
+// Date.timezone
+if (!ini_get('date.timezone'))
+{
+ date_default_timezone_set('Asia/Shanghai');
+}
+
+//核心库目录
+define('CORE', dirname(__FILE__));
+define('PATH_ROOT', CORE."/../");
+define('PATH_DATA', CORE."/../data");
+define('PATH_LIBRARY', CORE."/../library");
+
+//系统配置
+//if( file_exists( PATH_ROOT."/config/inc_config.php" ) )
+//{
+ //require PATH_ROOT."/config/inc_config.php";
+//}
+
+
diff --git a/vendor/owner888/phpspider/core/db.php b/vendor/owner888/phpspider/core/db.php
new file mode 100644
index 0000000..f891a9d
--- /dev/null
+++ b/vendor/owner888/phpspider/core/db.php
@@ -0,0 +1,579 @@
+
+// +----------------------------------------------------------------------
+
+//----------------------------------
+// PHPSpider数据库类文件
+//----------------------------------
+
+namespace phpspider\core;
+
+class db
+{
+ private static $configs = array();
+ private static $rsid;
+ private static $links = array();
+ private static $link_name = 'default';
+ private static $autocommiting = false;
+
+ public static function _init()
+ {
+ // 获取配置
+ $config = self::$link_name == 'default' ? self::_get_default_config() : self::$configs[self::$link_name];
+
+ // 创建连接
+ if (empty(self::$links[self::$link_name]) || empty(self::$links[self::$link_name]['conn']))
+ {
+ // 第一次连接,初始化fail和pid
+ if (empty(self::$links[self::$link_name]))
+ {
+ self::$links[self::$link_name]['fail'] = 0;
+ self::$links[self::$link_name]['pid'] = function_exists('posix_getpid') ? posix_getpid() : 0;
+ //echo "progress[".self::$links[self::$link_name]['pid']."] create db connect[".self::$link_name."]\n";
+ }
+ self::$links[self::$link_name]['conn'] = mysqli_connect($config['host'], $config['user'], $config['pass'], $config['name'], $config['port']);
+ if(mysqli_connect_errno())
+ {
+ self::$links[self::$link_name]['fail']++;
+ $errmsg = 'Mysql Connect failed['.self::$links[self::$link_name]['fail'].']: ' . mysqli_connect_error();
+ echo util::colorize(date("H:i:s") . " {$errmsg}\n\n", 'fail');
+ log::add($errmsg, "Error");
+ // 连接失败5次,中断进程
+ if (self::$links[self::$link_name]['fail'] >= 5)
+ {
+ exit(250);
+ }
+ self::_init($config);
+ }
+ else
+ {
+ mysqli_query(self::$links[self::$link_name]['conn'], " SET character_set_connection=utf8, character_set_results=utf8, character_set_client=binary, sql_mode='' ");
+ }
+ }
+ else
+ {
+ $curr_pid = function_exists('posix_getpid') ? posix_getpid() : 0;
+ // 如果父进程已经生成资源就释放重新生成,因为多进程不能共享连接资源
+ if (self::$links[self::$link_name]['pid'] != $curr_pid)
+ {
+ self::clear_link();
+ }
+ }
+ }
+
+ /**
+ * 重新设置连接
+ * 传空的话就等于关闭数据库再连接
+ * 在多进程环境下如果主进程已经调用过了,子进程一定要调用一次 clear_link,否则会报错:
+ * Error while reading greeting packet. PID=19615,这是两个进程互抢一个连接句柄引起的
+ *
+ * @param array $config
+ * @return void
+ * @author seatle
+ * @created time :2016-03-29 00:51
+ */
+ public static function clear_link()
+ {
+ if(self::$links)
+ {
+ foreach(self::$links as $k=>$v)
+ {
+ @mysqli_close($v['conn']);
+ unset(self::$links[$k]);
+ }
+ }
+ // 注意,只会连接最后一个,不过貌似也够用了啊
+ self::_init();
+ }
+
+ /**
+ * 改变链接为指定配置的链接(如果不同时使用多个数据库,不会涉及这个操作)
+ * @parem $link_name 链接标识名
+ * @parem $config 多次使用时, 这个数组只需传递一次
+ * config 格式与 $GLOBALS['config']['db'] 一致
+ * @return void
+ */
+ public static function set_connect($link_name, $config = array())
+ {
+ self::$link_name = $link_name;
+ if (!empty($config))
+ {
+ self::$configs[self::$link_name] = $config;
+ }
+ else
+ {
+ if (empty(self::$configs[self::$link_name]))
+ {
+ throw new Exception("You not set a config array for connect!");
+ }
+ }
+ }
+
+
+ /**
+ * 还原为默认连接(如果不同时使用多个数据库,不会涉及这个操作)
+ * @parem $config 指定配置(默认使用inc_config.php的配置)
+ * @return void
+ */
+ public static function set_connect_default()
+ {
+ $config = self::_get_default_config();
+ self::set_connect('default', $config);
+ }
+
+
+ /**
+ * 获取默认配置
+ */
+ protected static function _get_default_config()
+ {
+ if (empty(self::$configs['default']))
+ {
+ if (!is_array($GLOBALS['config']['db']))
+ {
+ exit('db.php _get_default_config()' . '没有mysql配置');
+ }
+ self::$configs['default'] = $GLOBALS['config']['db'];
+ }
+ return self::$configs['default'];
+ }
+
+ /**
+ * 返回查询游标
+ * @return rsid
+ */
+ protected static function _get_rsid($rsid = '')
+ {
+ return $rsid == '' ? self::$rsid : $rsid;
+ }
+
+ public static function autocommit($mode = false)
+ {
+ if ( self::$autocommiting )
+ {
+ return true;
+ }
+
+ self::$autocommiting = true;
+
+ self::_init();
+ return mysqli_autocommit(self::$links[self::$link_name]['conn'], $mode);
+ }
+
+ public static function begin_tran()
+ {
+ return self::autocommit(false);
+ }
+
+ public static function commit()
+ {
+ mysqli_commit(self::$links[self::$link_name]['conn']);
+ self::autocommit(true);
+ return true;
+ }
+
+
+ public static function rollback()
+ {
+ mysqli_rollback(self::$links[self::$link_name]['conn']);
+ self::autocommit(true);
+ return true;
+ }
+
+ public static function query($sql)
+ {
+ $sql = trim($sql);
+
+ // 初始化数据库
+ self::_init();
+ self::$rsid = @mysqli_query(self::$links[self::$link_name]['conn'], $sql);
+
+ if (self::$rsid === false)
+ {
+ // 不要每次都ping,浪费流量浪费性能,执行出错了才重新连接
+ $errno = mysqli_errno(self::$links[self::$link_name]['conn']);
+ if ($errno == 2013 || $errno == 2006)
+ {
+ $errmsg = mysqli_error(self::$links[self::$link_name]['conn']);
+ log::add($errmsg, "Error");
+
+ @mysqli_close(self::$links[self::$link_name]['conn']);
+ self::$links[self::$link_name]['conn'] = null;
+ return self::query($sql);
+ }
+
+ $errmsg = "Query SQL: ".$sql;
+ log::add($errmsg, "Warning");
+ $errmsg = "Error SQL: ".mysqli_error(self::$links[self::$link_name]['conn']);
+ log::add($errmsg, "Warning");
+
+ $backtrace = debug_backtrace();
+ array_shift($backtrace);
+ $narr = array('class', 'type', 'function', 'file', 'line');
+ $err = "debug_backtrace:\n";
+ foreach($backtrace as $i => $l)
+ {
+ foreach($narr as $k)
+ {
+ if( !isset($l[$k]) )
+ {
+ $l[$k] = '';
+ }
+ }
+ $err .= "[$i] in function {$l['class']}{$l['type']}{$l['function']} ";
+ if($l['file']) $err .= " in {$l['file']} ";
+ if($l['line']) $err .= " on line {$l['line']} ";
+ $err .= "\n";
+ }
+ log::add($err);
+
+ return false;
+ }
+ else
+ {
+ return self::$rsid;
+ }
+ }
+
+ public static function fetch($rsid = '')
+ {
+ $rsid = self::_get_rsid($rsid);
+ $row = mysqli_fetch_array($rsid, MYSQLI_ASSOC);
+ return $row;
+ }
+
+ public static function get_one($sql)
+ {
+ if (!preg_match("/limit/i", $sql))
+ {
+ $sql = preg_replace("/[,;]$/i", '', trim($sql)) . " limit 1 ";
+ }
+ $rsid = self::query($sql);
+ if ($rsid === false)
+ {
+ return array();
+ }
+ $row = self::fetch($rsid);
+ self::free($rsid);
+ return $row;
+ }
+
+ public static function get_all($sql)
+ {
+ $rsid = self::query($sql);
+ if ($rsid === false)
+ {
+ return array();
+ }
+ while ( $row = self::fetch($rsid) )
+ {
+ $rows[] = $row;
+ }
+ self::free($rsid);
+ return empty($rows) ? false : $rows;
+ }
+
+ public static function free($rsid)
+ {
+ return mysqli_free_result($rsid);
+ }
+
+ public static function insert_id()
+ {
+ return mysqli_insert_id(self::$links[self::$link_name]['conn']);
+ }
+
+ public static function affected_rows()
+ {
+ return mysqli_affected_rows(self::$links[self::$link_name]['conn']);
+ }
+
+ public static function insert($table = '', $data = null, $return_sql = false)
+ {
+ $items_sql = $values_sql = "";
+ foreach ($data as $k => $v)
+ {
+ $v = stripslashes($v);
+ $v = addslashes($v);
+ $items_sql .= "`$k`,";
+ $values_sql .= "\"$v\",";
+ }
+ $sql = "Insert Ignore Into `{$table}` (" . substr($items_sql, 0, -1) . ") Values (" . substr($values_sql, 0, -1) . ")";
+ if ($return_sql)
+ {
+ return $sql;
+ }
+ else
+ {
+ if (self::query($sql))
+ {
+ return mysqli_insert_id(self::$links[self::$link_name]['conn']);
+ }
+ else
+ {
+ return false;
+ }
+ }
+ }
+
+ public static function insert_batch($table = '', $set = NULL, $return_sql = FALSE)
+ {
+ if (empty($table) || empty($set))
+ {
+ return false;
+ }
+ $set = self::strsafe($set);
+ $fields = self::get_fields($table);
+
+ $keys_sql = $vals_sql = array();
+ foreach ($set as $i=>$val)
+ {
+ ksort($val);
+ $vals = array();
+ foreach ($val as $k => $v)
+ {
+ // 过滤掉数据库没有的字段
+ if (!in_array($k, $fields))
+ {
+ continue;
+ }
+ // 如果是第一个数组,把key当做插入条件
+ if ($i == 0 && $k == 0)
+ {
+ $keys_sql[] = "`$k`";
+ }
+ $vals[] = "\"$v\"";
+ }
+ $vals_sql[] = implode(",", $vals);
+ }
+
+ $sql = "Insert Ignore Into `{$table}`(".implode(", ", $keys_sql).") Values (".implode("), (", $vals_sql).")";
+
+ if ($return_sql) return $sql;
+
+ $rt = self::query($sql);
+ $insert_id = self::insert_id();
+ $return = empty($insert_id) ? $rt : $insert_id;
+ return $return;
+ }
+
+ public static function update_batch($table = '', $set = NULL, $index = NULL, $where = NULL, $return_sql = FALSE)
+ {
+ if (empty($table) || is_null($set) || is_null($index))
+ {
+ // 不要用exit,会中断程序
+ return false;
+ }
+ $set = self::strsafe($set);
+ $fields = self::get_fields($table);
+
+ $ids = array();
+ foreach ($set as $val)
+ {
+ ksort($val);
+ // 去重,其实不去也可以,因为相同的when只会执行第一个,后面的就直接跳过不执行了
+ $key = md5($val[$index]);
+ $ids[$key] = $val[$index];
+
+ foreach (array_keys($val) as $field)
+ {
+ if ($field != $index)
+ {
+ $final[$field][$key] = 'When `'.$index.'` = "'.$val[$index].'" Then "'.$val[$field].'"';
+ }
+ }
+ }
+ //$ids = array_values($ids);
+
+ // 如果不是数组而且不为空,就转数组
+ if (!is_array($where) && !empty($where))
+ {
+ $where = array($where);
+ }
+ $where[] = $index.' In ("'.implode('","', $ids).'")';
+ $where = empty($where) ? "" : " Where ".implode(" And ", $where);
+
+ $sql = "Update `".$table."` Set ";
+ $cases = '';
+
+ foreach ($final as $k => $v)
+ {
+ // 过滤掉数据库没有的字段
+ if (!in_array($k, $fields))
+ {
+ continue;
+ }
+ $cases .= '`'.$k.'` = Case '."\n";
+ foreach ($v as $row)
+ {
+ $cases .= $row."\n";
+ }
+
+ $cases .= 'Else `'.$k.'` End, ';
+ }
+
+ $sql .= substr($cases, 0, -2);
+
+ // 其实不带 Where In ($index) 的条件也可以的
+ $sql .= $where;
+
+ if ($return_sql) return $sql;
+
+ $rt = self::query($sql);
+ $insert_id = self::affected_rows();
+ $return = empty($affected_rows) ? $rt : $affected_rows;
+ return $return;
+ }
+
+ public static function update($table = '', $data = array(), $where = null, $return_sql = false)
+ {
+ $sql = "UPDATE `{$table}` SET ";
+ foreach ($data as $k => $v)
+ {
+ $v = stripslashes($v);
+ $v = addslashes($v);
+ $sql .= "`{$k}` = \"{$v}\",";
+ }
+ if (!is_array($where))
+ {
+ $where = array($where);
+ }
+ // 删除空字段,不然array("")会成为WHERE
+ foreach ($where as $k => $v)
+ {
+ if (empty($v))
+ {
+ unset($where[$k]);
+ }
+ }
+ $where = empty($where) ? "" : " Where " . implode(" And ", $where);
+ $sql = substr($sql, 0, -1) . $where;
+ if ($return_sql)
+ {
+ return $sql;
+ }
+ else
+ {
+ if (self::query($sql))
+ {
+ return mysqli_affected_rows(self::$links[self::$link_name]['conn']);
+ }
+ else
+ {
+ return false;
+ }
+ }
+ }
+
+ public static function delete($table = '', $where = null, $return_sql = false)
+ {
+ // 小心全部被删除了
+ if (empty($where))
+ {
+ return false;
+ }
+ $where = 'Where ' . (!is_array($where) ? $where : implode(' And ', $where));
+ $sql = "Delete From `{$table}` {$where}";
+ if ($return_sql)
+ {
+ return $sql;
+ }
+ else
+ {
+ if (self::query($sql))
+ {
+ return mysqli_affected_rows(self::$links[self::$link_name]['conn']);
+ }
+ else
+ {
+ return false;
+ }
+ }
+ }
+
+ public static function ping()
+ {
+ if (!mysqli_ping(self::$links[self::$link_name]['conn']))
+ {
+ @mysqli_close(self::$links[self::$link_name]['conn']);
+ self::$links[self::$link_name]['conn'] = null;
+ self::_init();
+ }
+ }
+
+ public static function strsafe($array)
+ {
+ $arrays = array();
+ if(is_array($array)===true)
+ {
+ foreach ($array as $key => $val)
+ {
+ if(is_array($val)===true)
+ {
+ $arrays[$key] = self::strsafe($val);
+ }
+ else
+ {
+ //先去掉转义,避免下面重复转义了
+ $val = stripslashes($val);
+ //进行转义
+ $val = addslashes($val);
+ //处理addslashes没法处理的 _ % 字符
+ //$val = strtr($val, array('_'=>'\_', '%'=>'\%'));
+ $arrays[$key] = $val;
+ }
+ }
+ return $arrays;
+ }
+ else
+ {
+ $array = stripslashes($array);
+ $array = addslashes($array);
+ //$array = strtr($array, array('_'=>'\_', '%'=>'\%'));
+ return $array;
+ }
+ }
+
+ // 这个是给insert、update、insert_batch、update_batch用的
+ public static function get_fields($table)
+ {
+ // $sql = "SHOW COLUMNS FROM $table"; //和下面的语句效果一样
+ $rows = self::get_all("Desc `{$table}`");
+ $fields = array();
+ foreach ($rows as $k => $v)
+ {
+ // 过滤自增主键
+ // if ($v['Key'] != 'PRI')
+ if ($v['Extra'] != 'auto_increment')
+ {
+ $fields[] = $v['Field'];
+ }
+ }
+ return $fields;
+ }
+
+ public static function table_exists($table_name)
+ {
+ $sql = "SHOW TABLES LIKE '" . $table_name . "'";
+ $rsid = self::query($sql);
+ $table = self::fetch($rsid);
+ if (empty($table))
+ {
+ return false;
+ }
+ return true;
+ }
+}
+
+
+
+
+
+
diff --git a/vendor/owner888/phpspider/core/init.php b/vendor/owner888/phpspider/core/init.php
new file mode 100644
index 0000000..7bbb2aa
--- /dev/null
+++ b/vendor/owner888/phpspider/core/init.php
@@ -0,0 +1,101 @@
+
+// +----------------------------------------------------------------------
+
+//----------------------------------
+// PHPSpider公共入口文件
+//----------------------------------
+
+// 严格开发模式
+error_reporting( E_ALL );
+//ini_set('display_errors', 1);
+
+// 永不超时
+ini_set('max_execution_time', 0);
+set_time_limit(0);
+// 内存限制,如果外面设置的内存比 /etc/php/php-cli.ini 大,就不要设置了
+if (intval(ini_get("memory_limit")) < 1024)
+{
+ ini_set('memory_limit', '1024M');
+}
+
+if( PHP_SAPI != 'cli' )
+{
+ exit("You must run the CLI environment\n");
+}
+
+// 设置时区
+date_default_timezone_set('Asia/Shanghai');
+
+// 引入PATH_DATA
+require_once __DIR__ . '/constants.php';
+// 核心库目录
+define('CORE', dirname(__FILE__));
+define('PATH_ROOT', CORE."/../");
+define('PATH_DATA', CORE."/../data");
+define('PATH_LIBRARY', CORE."/../library");
+
+// 系统配置
+if( file_exists( PATH_ROOT."/config/inc_config.php" ) )
+{
+ require PATH_ROOT."/config/inc_config.php";
+}
+require CORE.'/log.php';
+require CORE.'/requests.php';
+require CORE.'/selector.php';
+require CORE.'/util.php';
+require CORE.'/db.php';
+require CORE.'/cache.php';
+require CORE."/worker.php";
+require CORE."/phpspider.php";
+
+// 启动的时候生成data目录
+util::path_exists(PATH_DATA);
+util::path_exists(PATH_DATA."/lock");
+util::path_exists(PATH_DATA."/log");
+util::path_exists(PATH_DATA."/cache");
+util::path_exists(PATH_DATA."/status");
+
+function autoload($classname) {
+ set_include_path(PATH_ROOT.'/library/');
+ spl_autoload($classname); //replaces include/require
+}
+
+spl_autoload_extensions('.php');
+spl_autoload_register('autoload');
+
+/**
+ * 自动加载类库处理
+ * @return void
+ */
+//function __autoload( $classname )
+//{
+ //$classname = preg_replace("/[^0-9a-z_]/i", '', $classname);
+ //if( class_exists ( $classname ) ) {
+ //return true;
+ //}
+ //$classfile = $classname.'.php';
+ //try
+ //{
+ //if ( file_exists ( PATH_LIBRARY.'/'.$classfile ) )
+ //{
+ //require PATH_LIBRARY.'/'.$classfile;
+ //}
+ //else
+ //{
+ //throw new Exception ( 'Error: Cannot find the '.$classname );
+ //}
+ //}
+ //catch ( Exception $e )
+ //{
+ //log::error($e->getMessage().'|'.$classname);
+ //exit();
+ //}
+//}
diff --git a/vendor/owner888/phpspider/core/log.php b/vendor/owner888/phpspider/core/log.php
new file mode 100644
index 0000000..b4c4cf0
--- /dev/null
+++ b/vendor/owner888/phpspider/core/log.php
@@ -0,0 +1,119 @@
+
+// +----------------------------------------------------------------------
+
+//----------------------------------
+// PHPSpider日志类文件
+//----------------------------------
+
+namespace phpspider\core;
+// 引入PATH_DATA
+require_once __DIR__ . '/constants.php';
+
+class log
+{
+ public static $log_show = false;
+ public static $log_type = false;
+ public static $log_file = "data/phpspider.log";
+ public static $out_sta = "";
+ public static $out_end = "";
+
+ public static function note($msg)
+ {
+ self::$out_sta = self::$out_end = "";
+ self::msg($msg, 'note');
+ }
+
+ public static function info($msg)
+ {
+ self::$out_sta = self::$out_end = "";
+ self::msg($msg, 'info');
+ }
+
+ public static function warn($msg)
+ {
+ self::$out_sta = self::$out_end = "";
+ if (!util::is_win())
+ {
+ self::$out_sta = "\033[33m";
+ self::$out_end = "\033[0m";
+ }
+
+ self::msg($msg, 'warn');
+ }
+
+ public static function debug($msg)
+ {
+ self::$out_sta = self::$out_end = "";
+ if (!util::is_win())
+ {
+ self::$out_sta = "\033[36m";
+ self::$out_end = "\033[0m";
+ }
+
+ self::msg($msg, 'debug');
+ }
+
+ public static function error($msg)
+ {
+ self::$out_sta = self::$out_end = "";
+ if (!util::is_win())
+ {
+ self::$out_sta = "\033[31m";
+ self::$out_end = "\033[0m";
+ }
+
+ self::msg($msg, 'error');
+ }
+
+ public static function msg($msg, $log_type)
+ {
+ if ($log_type != 'note' && self::$log_type && strpos(self::$log_type, $log_type) === false)
+ {
+ return false;
+ }
+
+ if ($log_type == 'note')
+ {
+ $msg = self::$out_sta. $msg . "\n".self::$out_end;
+ }
+ else
+ {
+ $msg = self::$out_sta.date("Y-m-d H:i:s")." [{$log_type}] " . $msg .self::$out_end. "\n";
+ }
+ if(self::$log_show)
+ {
+ echo $msg;
+ }
+ file_put_contents(self::$log_file, $msg, FILE_APPEND | LOCK_EX);
+ }
+
+ /**
+ * 记录日志 XXX
+ * @param string $msg
+ * @param string $log_type Note|Warning|Error
+ * @return void
+ */
+ public static function add($msg, $log_type = '')
+ {
+ if ($log_type != '')
+ {
+ $msg = date("Y-m-d H:i:s")." [{$log_type}] " . $msg . "\n";
+ }
+ if(self::$log_show)
+ {
+ echo $msg;
+ }
+ //file_put_contents(PATH_DATA."/log/".strtolower($log_type).".log", $msg, FILE_APPEND | LOCK_EX);
+ file_put_contents(PATH_DATA."/log/error.log", $msg, FILE_APPEND | LOCK_EX);
+ }
+
+}
+
diff --git a/vendor/owner888/phpspider/core/phpspider.bak20170807.php b/vendor/owner888/phpspider/core/phpspider.bak20170807.php
new file mode 100644
index 0000000..e747237
--- /dev/null
+++ b/vendor/owner888/phpspider/core/phpspider.bak20170807.php
@@ -0,0 +1,2870 @@
+
+// +----------------------------------------------------------------------
+
+//----------------------------------
+// PHPSpider核心类文件
+//----------------------------------
+
+namespace phpspider\core;
+
+require_once __DIR__ . '/constants.php';
+
+use phpspider\core\requests;
+use phpspider\core\selector;
+use phpspider\core\queue;
+use phpspider\core\db;
+use phpspider\core\util;
+use phpspider\core\log;
+use Exception;
+
+//require CORE.'/log.php';
+//require CORE.'/requests.php';
+//require CORE.'/selector.php';
+//require CORE.'/util.php';
+//require CORE.'/db.php';
+//require CORE.'/cache.php';
+//require CORE."/worker.php";
+//require CORE."/phpspider.php";
+
+// 启动的时候生成data目录
+util::path_exists(PATH_DATA);
+util::path_exists(PATH_DATA."/lock");
+util::path_exists(PATH_DATA."/log");
+util::path_exists(PATH_DATA."/cache");
+util::path_exists(PATH_DATA."/status");
+
+class phpspider
+{
+ /**
+ * 版本号
+ * @var string
+ */
+ const VERSION = '3.0.4';
+
+ /**
+ * 爬虫爬取每个网页的时间间隔,0表示不延时, 单位: 毫秒
+ */
+ const INTERVAL = 0;
+
+ /**
+ * 爬虫爬取每个网页的超时时间, 单位: 秒
+ */
+ const TIMEOUT = 5;
+
+ /**
+ * 爬取失败次数, 不想失败重新爬取则设置为0
+ */
+ const MAX_TRY = 0;
+
+ /**
+ * 爬虫爬取网页所使用的浏览器类型: pc、ios、android
+ * 默认类型是PC
+ */
+ const AGENT_PC = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36";
+ const AGENT_IOS = "Mozilla/5.0 (iPhone; CPU iPhone OS 9_3_3 like Mac OS X) AppleWebKit/601.1.46 (KHTML, like Gecko) Version/9.0 Mobile/13G34 Safari/601.1";
+ const AGENT_ANDROID = "Mozilla/5.0 (Linux; U; Android 6.0.1;zh_cn; Le X820 Build/FEXCNFN5801507014S) AppleWebKit/537.36 (KHTML, like Gecko)Version/4.0 Chrome/49.0.0.0 Mobile Safari/537.36 EUI Browser/5.8.015S";
+
+ /**
+ * pid文件的路径及名称
+ * @var string
+ */
+ //public static $pid_file = '';
+
+ /**
+ * 日志目录, 默认在data根目录下
+ * @var mixed
+ */
+ //public static $log_file = '';
+
+ /**
+ * 主任务进程ID
+ */
+ //public static $master_pid = 0;
+
+ /**
+ * 所有任务进程ID
+ */
+ //public static $taskpids = array();
+
+ /**
+ * Daemonize.
+ *
+ * @var bool
+ */
+ public static $daemonize = false;
+
+ /**
+ * 当前进程是否终止
+ */
+ public static $terminate = false;
+
+ /**
+ * 是否分布式
+ */
+ public static $multiserver = false;
+
+ /**
+ * 当前服务器ID
+ */
+ public static $serverid = 1;
+
+ /**
+ * 主任务进程
+ */
+ public static $taskmaster = true;
+
+ /**
+ * 当前任务ID
+ */
+ public static $taskid = 1;
+
+ /**
+ * 当前任务进程ID
+ */
+ public static $taskpid = 1;
+
+ /**
+ * 并发任务数
+ */
+ public static $tasknum = 1;
+
+ /**
+ * 生成
+ */
+ public static $fork_task_complete = false;
+
+ /**
+ * 是否使用Redis
+ */
+ public static $use_redis = false;
+
+ /**
+ * 是否保存爬虫运行状态
+ */
+ public static $save_running_state = false;
+
+ /**
+ * 配置
+ */
+ public static $configs = array();
+
+ /**
+ * 要抓取的URL队列
+ md5(url) => array(
+ 'url' => '', // 要爬取的URL
+ 'url_type' => '', // 要爬取的URL类型,scan_page、list_page、content_page
+ 'method' => 'get', // 默认为"GET"请求, 也支持"POST"请求
+ 'headers' => array(), // 此url的Headers, 可以为空
+ 'params' => array(), // 发送请求时需添加的参数, 可以为空
+ 'context_data'=> '', // 此url附加的数据, 可以为空
+ 'proxies' => false, // 是否使用代理
+ 'try_num' => 0 // 抓取次数
+ 'max_try' => 0 // 允许抓取失败次数
+ )
+ */
+ public static $collect_queue = array();
+
+ /**
+ * 要抓取的URL数组
+ * md5($url) => time()
+ */
+ public static $collect_urls = array();
+
+ /**
+ * 要抓取的URL数量
+ */
+ public static $collect_urls_num = 0;
+
+ /**
+ * 已经抓取的URL数量
+ */
+ public static $collected_urls_num = 0;
+
+ /**
+ * 当前进程采集成功数
+ */
+ public static $collect_succ = 0;
+
+ /**
+ * 当前进程采集失败数
+ */
+ public static $collect_fail = 0;
+
+ /**
+ * 提取到的字段数
+ */
+ public static $fields_num = 0;
+
+ /**
+ * 采集深度
+ */
+ public static $depth_num = 0;
+
+ /**
+ * 爬虫开始时间
+ */
+ public static $time_start = 0;
+
+ /**
+ * 任务状态
+ */
+ public static $task_status = array();
+
+ // 导出类型配置
+ public static $export_type = '';
+ public static $export_file = '';
+ public static $export_conf = '';
+ public static $export_table = '';
+
+ // 数据库配置
+ public static $db_config = array();
+ // 队列配置
+ public static $queue_config = array();
+
+ // 运行面板参数长度
+ public static $server_length = 10;
+ public static $tasknum_length = 8;
+ public static $taskid_length = 8;
+ public static $pid_length = 8;
+ public static $mem_length = 8;
+ public static $urls_length = 15;
+ public static $speed_length = 6;
+
+ /**
+ * 爬虫初始化时调用, 用来指定一些爬取前的操作
+ *
+ * @var mixed
+ * @access public
+ */
+ public $on_start = null;
+
+ /**
+ * 网页状态码回调
+ *
+ * @var mixed
+ * @access public
+ */
+ public $on_status_code = null;
+
+ /**
+ * 判断当前网页是否被反爬虫, 需要开发者实现
+ *
+ * @var mixed
+ * @access public
+ */
+ public $is_anti_spider = null;
+
+ /**
+ * 在一个网页下载完成之后调用, 主要用来对下载的网页进行处理
+ *
+ * @var mixed
+ * @access public
+ */
+ public $on_download_page = null;
+
+ /**
+ * 在一个attached_url对应的网页下载完成之后调用. 主要用来对下载的网页进行处理
+ *
+ * @var mixed
+ * @access public
+ */
+ public $on_download_attached_page = null;
+
+ /**
+ * 当前页面抽取到URL
+ *
+ * @var mixed
+ * @access public
+ */
+ public $on_fetch_url = null;
+
+ /**
+ * URL属于入口页
+ * 在爬取到入口url的内容之后, 添加新的url到待爬队列之前调用
+ * 主要用来发现新的待爬url, 并且能给新发现的url附加数据
+ *
+ * @var mixed
+ * @access public
+ */
+ public $on_scan_page = null;
+
+ /**
+ * URL属于列表页
+ * 在爬取到列表页url的内容之后, 添加新的url到待爬队列之前调用
+ * 主要用来发现新的待爬url, 并且能给新发现的url附加数据
+ *
+ * @var mixed
+ * @access public
+ */
+ public $on_list_page = null;
+
+ /**
+ * URL属于内容页
+ * 在爬取到内容页url的内容之后, 添加新的url到待爬队列之前调用
+ * 主要用来发现新的待爬url, 并且能给新发现的url附加数据
+ *
+ * @var mixed
+ * @access public
+ */
+ public $on_content_page = null;
+
+ /**
+ * 在抽取到field内容之后调用, 对其中包含的img标签进行回调处理
+ *
+ * @var mixed
+ * @access public
+ */
+ public $on_handle_img = null;
+
+ /**
+ * 当一个field的内容被抽取到后进行的回调, 在此回调中可以对网页中抽取的内容作进一步处理
+ *
+ * @var mixed
+ * @access public
+ */
+ public $on_extract_field = null;
+
+ /**
+ * 在一个网页的所有field抽取完成之后, 可能需要对field进一步处理, 以发布到自己的网站
+ *
+ * @var mixed
+ * @access public
+ */
+ public $on_extract_page = null;
+
+ /**
+ * 如果抓取的页面是一个附件文件, 比如图片、视频、二进制文件、apk、ipad、exe
+ * 就不去分析他的内容提取field了, 提取field只针对HTML
+ *
+ * @var mixed
+ * @access public
+ */
+ public $on_attachment_file = null;
+
+ function __construct($configs = array())
+ {
+ // 产生时钟云,解决php7下面ctrl+c无法停止bug
+ declare(ticks = 1);
+
+ // 先打开以显示验证报错内容
+ log::$log_show = true;
+ log::$log_file = isset($configs['log_file']) ? $configs['log_file'] : PATH_DATA.'/phpspider.log';
+ log::$log_type = isset($configs['log_type']) ? $configs['log_type'] : false;
+
+ // 彩蛋
+ $included_files = get_included_files();
+ $content = file_get_contents($included_files[0]);
+ if (!preg_match("#/\* Do NOT delete this comment \*/#", $content) || !preg_match("#/\* 不要删除这段注释 \*/#", $content))
+ {
+ $msg = "Unknown error...";
+ log::error($msg);
+ exit;
+ }
+
+ $configs['name'] = isset($configs['name']) ? $configs['name'] : 'phpspider';
+ $configs['proxies'] = isset($configs['proxies']) ? $configs['proxies'] : '';
+ $configs['user_agent'] = isset($configs['user_agent']) ? $configs['user_agent'] : self::AGENT_PC;
+ $configs['user_agents'] = isset($configs['user_agents']) ? $configs['user_agents'] : null;
+ $configs['client_ip'] = isset($configs['client_ip']) ? $configs['client_ip'] : null;
+ $configs['client_ips'] = isset($configs['client_ips']) ? $configs['client_ips'] : null;
+ $configs['interval'] = isset($configs['interval']) ? $configs['interval'] : self::INTERVAL;
+ $configs['timeout'] = isset($configs['timeout']) ? $configs['timeout'] : self::TIMEOUT;
+ $configs['max_try'] = isset($configs['max_try']) ? $configs['max_try'] : self::MAX_TRY;
+ $configs['max_depth'] = isset($configs['max_depth']) ? $configs['max_depth'] : 0;
+ $configs['max_fields'] = isset($configs['max_fields']) ? $configs['max_fields'] : 0;
+ $configs['export'] = isset($configs['export']) ? $configs['export'] : array();
+
+ // csv、sql、db
+ self::$export_type = isset($configs['export']['type']) ? $configs['export']['type'] : '';
+ self::$export_file = isset($configs['export']['file']) ? $configs['export']['file'] : '';
+ self::$export_table = isset($configs['export']['table']) ? $configs['export']['table'] : '';
+ self::$db_config = isset($configs['db_config']) ? $configs['db_config'] : array();
+ self::$queue_config = isset($configs['queue_config']) ? $configs['queue_config'] : array();
+
+ // 是否设置了并发任务数, 并且大于1, 而且不是windows环境
+ if (isset($configs['tasknum']) && $configs['tasknum'] > 1 && !util::is_win())
+ {
+ self::$tasknum = $configs['tasknum'];
+ }
+
+ // 是否设置了保留运行状态
+ if (isset($configs['save_running_state']))
+ {
+ self::$save_running_state = $configs['save_running_state'];
+ }
+
+ // 是否分布式
+ if (isset($configs['multiserver']))
+ {
+ self::$multiserver = $configs['multiserver'];
+ }
+
+ // 当前服务器ID
+ if (isset($configs['serverid']))
+ {
+ self::$serverid = $configs['serverid'];
+ }
+
+ // 不同项目的采集以采集名称作为前缀区分
+ if (isset($GLOBALS['config']['redis']['prefix']))
+ {
+ $GLOBALS['config']['redis']['prefix'] = $GLOBALS['config']['redis']['prefix'].'-'.md5($configs['name']);
+ }
+
+ self::$configs = $configs;
+ }
+
+ public function add_scan_url($url, $options = array(), $allowed_repeat = true)
+ {
+ // 投递状态
+ $status = false;
+
+ $link = $options;
+ $link['url'] = $url;
+ $link['url_type'] = 'scan_page';
+ $link = $this->link_uncompress($link);
+
+ if ($this->is_list_page($url))
+ {
+ $link['url_type'] = 'list_page';
+ $status = $this->queue_lpush($link, $allowed_repeat);
+ }
+ elseif ($this->is_content_page($url))
+ {
+ $link['url_type'] = 'content_page';
+ $status = $this->queue_lpush($link, $allowed_repeat);
+ }
+ else
+ {
+ $status = $this->queue_lpush($link, $allowed_repeat);
+ }
+
+ if ($status)
+ {
+ if ($link['url_type'] == 'scan_page')
+ {
+ log::debug("Find scan page: {$url}");
+ }
+ elseif ($link['url_type'] == 'list_page')
+ {
+ log::debug("Find list page: {$url}");
+ }
+ elseif ($link['url_type'] == 'content_page')
+ {
+ log::debug("Find content page: {$url}");
+ }
+ }
+
+ return $status;
+ }
+
+ /**
+ * 一般在 on_scan_page 和 on_list_page 回调函数中调用, 用来往待爬队列中添加url
+ * 两个进程同时调用这个方法, 传递相同url的时候, 就会出现url重复进入队列
+ *
+ * @param mixed $url
+ * @param mixed $options
+ * @return void
+ * @author seatle
+ * @created time :2016-09-18 10:17
+ */
+ public function add_url($url, $options = array(), $depth = 0)
+ {
+ // 投递状态
+ $status = false;
+
+ $link = $options;
+ $link['url'] = $url;
+ $link['depth'] = $depth;
+ $link = $this->link_uncompress($link);
+
+ if ($this->is_list_page($url))
+ {
+ $link['url_type'] = 'list_page';
+ $status = $this->queue_lpush($link);
+ }
+
+ if ($this->is_content_page($url))
+ {
+ $link['url_type'] = 'content_page';
+ $status = $this->queue_lpush($link);
+ }
+
+ if ($status)
+ {
+ if ($link['url_type'] == 'scan_page')
+ {
+ log::debug("Find scan page: {$url}");
+ }
+ elseif ($link['url_type'] == 'list_page')
+ {
+ log::debug("Find list page: {$url}");
+ }
+ elseif ($link['url_type'] == 'content_page')
+ {
+ log::debug("Find content page: {$url}");
+ }
+ }
+
+ return $status;
+ }
+
+ /**
+ * 是否入口页面
+ *
+ * @param mixed $url
+ * @return void
+ * @author seatle
+ * @created time :2016-10-12 19:06
+ */
+ public function is_scan_page($url)
+ {
+ $parse_url = parse_url($url);
+ if (empty($parse_url['host']) || !in_array($parse_url['host'], self::$configs['domains']))
+ {
+ return false;
+ }
+ return true;
+ }
+
+ /**
+ * 是否列表页面
+ *
+ * @param mixed $url
+ * @return void
+ * @author seatle
+ * @created time :2016-10-12 19:06
+ */
+ public function is_list_page($url)
+ {
+ $result = false;
+ if (!empty(self::$configs['list_url_regexes']))
+ {
+ foreach (self::$configs['list_url_regexes'] as $regex)
+ {
+ if (preg_match("#{$regex}#i", $url))
+ {
+ $result = true;
+ break;
+ }
+ }
+ }
+ return $result;
+ }
+
+ /**
+ * 是否内容页面
+ *
+ * @param mixed $url
+ * @return void
+ * @author seatle
+ * @created time :2016-10-12 19:06
+ */
+ public function is_content_page($url)
+ {
+ $result = false;
+ if (!empty(self::$configs['content_url_regexes']))
+ {
+ foreach (self::$configs['content_url_regexes'] as $regex)
+ {
+ if (preg_match("#{$regex}#i", $url))
+ {
+ $result = true;
+ break;
+ }
+ }
+ }
+ return $result;
+ }
+
+ /**
+ * Parse command.
+ * php yourfile.php start | stop | status | kill
+ *
+ * @return void
+ */
+ public function parse_command()
+ {
+ // 检查运行命令的参数
+ global $argv;
+ $start_file = $argv[0];
+
+ // 命令
+ $command = isset($argv[1]) ? trim($argv[1]) : 'start';
+
+ // 子命令, 目前只支持-d
+ $command2 = isset($argv[2]) ? $argv[2] : '';
+
+ // 根据命令做相应处理
+ switch($command)
+ {
+ // 启动 phpspider
+ case 'start':
+ if ($command2 === '-d')
+ {
+ self::$daemonize = true;
+ }
+ break;
+ case 'stop':
+ exec("ps aux | grep $start_file | grep -v grep | awk '{print $2}'", $info);
+ if (count($info) <= 1)
+ {
+ echo "PHPSpider[$start_file] not run\n";
+ }
+ else
+ {
+ //echo "PHPSpider[$start_file] is stoping ...\n";
+ echo "PHPSpider[$start_file] stop success";
+ exec("ps aux | grep $start_file | grep -v grep | awk '{print $2}' |xargs kill -SIGINT", $info);
+ }
+ exit;
+ break;
+ case 'kill':
+ exec("ps aux | grep $start_file | grep -v grep | awk '{print $2}' |xargs kill -SIGKILL");
+ break;
+ // 显示 phpspider 运行状态
+ case 'status':
+ exit(0);
+ // 未知命令
+ default :
+ exit("Usage: php yourfile.php {start|stop|status|kill}\n");
+ }
+ }
+
+ /**
+ * Signal hander.
+ *
+ * @param int $signal
+ */
+ public function signal_handler($signal)
+ {
+ switch ($signal) {
+ // Stop.
+ case SIGINT:
+ log::warn("Program stopping...");
+ self::$terminate = true;
+ break;
+ // Show status.
+ case SIGUSR2:
+ echo "show status\n";
+ break;
+ }
+ }
+
+ /**
+ * Install signal handler.
+ *
+ * @return void
+ */
+ public function install_signal()
+ {
+ if (function_exists('pcntl_signal'))
+ {
+ // stop
+ pcntl_signal(SIGINT, array(__CLASS__, 'signal_handler'), false);
+ // status
+ pcntl_signal(SIGUSR2, array(__CLASS__, 'signal_handler'), false);
+ // ignore
+ pcntl_signal(SIGPIPE, SIG_IGN, false);
+ }
+ }
+
+ /**
+ * Run as deamon mode.
+ *
+ * @throws Exception
+ */
+ protected static function daemonize()
+ {
+ if (!self::$daemonize)
+ {
+ return;
+ }
+
+ // fork前一定要关闭redis
+ queue::clear_link();
+
+ umask(0);
+ $pid = pcntl_fork();
+ if (-1 === $pid)
+ {
+ throw new Exception('fork fail');
+ }
+ elseif ($pid > 0)
+ {
+ exit(0);
+ }
+ if (-1 === posix_setsid())
+ {
+ throw new Exception("setsid fail");
+ }
+ // Fork again avoid SVR4 system regain the control of terminal.
+ $pid = pcntl_fork();
+ if (-1 === $pid)
+ {
+ throw new Exception("fork fail");
+ }
+ elseif (0 !== $pid)
+ {
+ exit(0);
+ }
+ }
+
+ /**
+ * 检查是否终止当前进程
+ *
+ * @return void
+ * @author seatle
+ * @created time :2016-11-16 11:06
+ */
+ public function check_terminate()
+ {
+ if (!self::$terminate)
+ {
+ return false;
+ }
+
+ // 删除当前任务状态
+ $this->del_task_status(self::$serverid, self::$taskid);
+
+ if (self::$taskmaster)
+ {
+ // 检查子进程是否都退出
+ while (true)
+ {
+ $all_stop = true;
+ for ($i = 2; $i <= self::$tasknum; $i++)
+ {
+ // 只要一个还活着就说明没有完全退出
+ $task_status = $this->get_task_status(self::$serverid, $i);
+ if ($task_status)
+ {
+ $all_stop = false;
+ }
+ }
+ if ($all_stop)
+ {
+ break;
+ }
+ else
+ {
+ log::warn("Task stop waiting...");
+ }
+ sleep(1);
+ }
+
+ $this->del_server_list(self::$serverid);
+
+ // 显示最后结果
+ log::$log_show = true;
+
+ $spider_time_run = util::time2second(intval(microtime(true) - self::$time_start));
+ log::note("Spider finished in {$spider_time_run}");
+
+ $get_collected_url_num = $this->get_collected_url_num();
+ log::note("Total pages: {$get_collected_url_num} \n");
+ }
+ exit();
+ }
+
+ public function start()
+ {
+ $this->parse_command();
+
+ // 爬虫开始时间
+ self::$time_start = time();
+ // 当前任务ID
+ self::$taskid = 1;
+ // 当前任务进程ID
+ self::$taskpid = function_exists('posix_getpid') ? posix_getpid() : 1;
+ self::$collect_succ = 0;
+ self::$collect_fail = 0;
+
+ //--------------------------------------------------------------------------------
+ // 运行前验证
+ //--------------------------------------------------------------------------------
+
+ // 检查PHP版本
+ if (version_compare(PHP_VERSION, '5.3.0', 'lt'))
+ {
+ log::error('PHP 5.3+ is required, currently installed version is: ' . phpversion());
+ exit;
+ }
+
+ // 检查CURL扩展
+ if(!function_exists('curl_init'))
+ {
+ log::error("The curl extension was not found");
+ exit;
+ }
+
+ // 多任务需要pcntl扩展支持
+ if (self::$tasknum > 1 && !function_exists('pcntl_fork'))
+ {
+ log::error("Multitasking needs pcntl, the pcntl extension was not found");
+ exit;
+ }
+
+ // 守护进程需要pcntl扩展支持
+ if (self::$daemonize && !function_exists('pcntl_fork'))
+ {
+ log::error("Daemonize needs pcntl, the pcntl extension was not found");
+ exit;
+ }
+
+ // 集群、保存运行状态、多任务都需要Redis支持
+ if (self::$multiserver || self::$save_running_state || self::$tasknum > 1)
+ {
+ self::$use_redis = true;
+
+ queue::set_connect('default', self::$queue_config);
+ if (!queue::init())
+ {
+ if (self::$multiserver)
+ {
+ log::error("Multiserver needs Redis support, ".queue::$error);
+ exit;
+ }
+
+ if (self::$tasknum > 1)
+ {
+ log::error("Multitasking needs Redis support, ".queue::$error);
+ exit;
+ }
+
+ if (self::$save_running_state)
+ {
+ log::error("Spider kept running state needs Redis support, ".queue::$error);
+ exit;
+ }
+ }
+ }
+
+ // 检查导出
+ $this->check_export();
+
+ // 检查缓存
+ $this->check_cache();
+
+ // 检查 scan_urls
+ if (empty(self::$configs['scan_urls']))
+ {
+ log::error("No scan url to start");
+ exit;
+ }
+
+ foreach ( self::$configs['scan_urls'] as $url )
+ {
+ // 只检查配置中的入口URL, 通过 add_scan_url 添加的不检查了.
+ if (!$this->is_scan_page($url))
+ {
+ log::error("Domain of scan_urls (\"{$url}\") does not match the domains of the domain name");
+ exit;
+ }
+ }
+
+ // windows 下没法显示面板, 强制显示日志
+ if (util::is_win())
+ {
+ self::$configs['name'] = iconv("UTF-8", "GB2312//IGNORE", self::$configs['name']);
+ log::$log_show = true;
+ }
+ else
+ {
+ log::$log_show = isset(self::$configs['log_show']) ? self::$configs['log_show'] : false;
+ }
+
+ if (self::$daemonize)
+ {
+ log::$log_show = true;
+ }
+
+ if (log::$log_show)
+ {
+ global $argv;
+ $start_file = $argv[0];
+
+ $header = "";
+ if (!util::is_win()) $header .= "\033[33m";
+ $header .= "\n[ ".self::$configs['name']." Spider ] is started...\n\n";
+ $header .= " * PHPSpider Version: ".self::VERSION."\n";
+ $header .= " * Documentation: https://doc.phpspider.org\n";
+ $header .= " * Task Number: ".self::$tasknum."\n\n";
+ $header .= "Input \"php $start_file stop\" to quit. Start success.\n";
+ if (!util::is_win()) $header .= "\033[0m";
+ log::note($header);
+ }
+
+ // 如果是守护进程,恢复日志状态
+ if (self::$daemonize)
+ {
+ log::$log_show = isset(self::$configs['log_show']) ? self::$configs['log_show'] : false;
+ }
+
+ // 多任务和分布式都要清掉, 当然分布式只清自己的
+ $this->init_redis();
+
+ //--------------------------------------------------------------------------------
+ // 生成多任务
+ //--------------------------------------------------------------------------------
+
+ // 添加入口URL到队列
+ foreach ( self::$configs['scan_urls'] as $url )
+ {
+ // false 表示不允许重复
+ $this->add_scan_url($url, null, false);
+ }
+
+ // 放这个位置, 可以添加入口页面
+ if ($this->on_start)
+ {
+ call_user_func($this->on_start, $this);
+ }
+
+ if (!self::$daemonize)
+ {
+ if (!log::$log_show)
+ {
+ // 第一次先清屏
+ $this->clear_echo();
+
+ // 先显示一次面板, 然后下面再每次采集成功显示一次
+ $this->display_ui();
+ }
+ }
+ else
+ {
+ $this->daemonize();
+ }
+
+ // 安装信号
+ $this->install_signal();
+
+ // 开始采集
+ $this->do_collect_page();
+
+ // 从服务器列表中删除当前服务器信息
+ $this->del_server_list(self::$serverid);
+ }
+
+ /**
+ * 创建一个子进程
+ * @param Worker $worker
+ * @throws Exception
+ */
+ public function fork_one_task($taskid)
+ {
+ $pid = pcntl_fork();
+
+ // 主进程记录子进程pid
+ if($pid > 0)
+ {
+ // 暂时没用
+ //self::$taskpids[$taskid] = $pid;
+ }
+ // 子进程运行
+ elseif(0 === $pid)
+ {
+ log::warn("Fork children task({$taskid}) successful...");
+
+ // 初始化子进程参数
+ self::$time_start = microtime(true);
+ self::$taskid = $taskid;
+ self::$taskmaster = false;
+ self::$taskpid = posix_getpid();
+ self::$collect_succ = 0;
+ self::$collect_fail = 0;
+
+ $this->do_collect_page();
+
+ // 这里用0表示正常退出
+ exit(0);
+ }
+ else
+ {
+ log::error("Fork children task({$taskid}) fail...");
+ exit;
+ }
+ }
+
+ public function do_collect_page()
+ {
+ queue::set_connect('default', self::$queue_config);
+ queue::init();
+ while( $queue_lsize = $this->queue_lsize() )
+ {
+ // 如果是主任务
+ if (self::$taskmaster)
+ {
+ // 多任务下主任务未准备就绪
+ if (self::$tasknum > 1 && !self::$fork_task_complete)
+ {
+ // 主进程采集到两倍于任务数时, 生成子任务一起采集
+ if ( $queue_lsize > self::$tasknum*2 )
+ {
+ self::$fork_task_complete = true;
+
+ // fork 子进程前一定要先干掉redis连接fd, 不然会存在进程互抢redis fd 问题
+ queue::clear_link();
+ // task进程从2开始, 1被master进程所使用
+ for ($i = 2; $i <= self::$tasknum; $i++)
+ {
+ $this->fork_one_task($i);
+ }
+ }
+ }
+
+ // 抓取页面
+ $this->collect_page();
+ // 保存任务状态
+ $this->set_task_status();
+
+ // 每采集成功一次页面, 就刷新一次面板
+ if (!log::$log_show && !self::$daemonize)
+ {
+ $this->display_ui();
+ }
+ }
+ // 如果是子任务
+ else
+ {
+ // 如果队列中的网页比任务数2倍多, 子任务可以采集, 否则等待...
+ if ( $queue_lsize > self::$tasknum*2 )
+ {
+ // 抓取页面
+ $this->collect_page();
+ // 保存任务状态
+ $this->set_task_status();
+ }
+ else
+ {
+ log::warn("Task(".self::$taskid.") waiting...");
+ sleep(1);
+ }
+ }
+
+ // 检查进程是否收到关闭信号
+ $this->check_terminate();
+ }
+ }
+
+ /**
+ * 爬取页面
+ *
+ * @param mixed $collect_url 要抓取的链接
+ * @return void
+ * @author seatle
+ * @created time :2016-09-18 10:17
+ */
+ public function collect_page()
+ {
+ $get_collect_url_num = $this->get_collect_url_num();
+ log::info("Find pages: {$get_collect_url_num} ");
+
+ $queue_lsize = $this->queue_lsize();
+ log::info("Waiting for collect pages: {$queue_lsize} ");
+
+ $get_collected_url_num = $this->get_collected_url_num();
+ log::info("Collected pages: {$get_collected_url_num} ");
+
+ // 多任务的时候输出爬虫序号
+ if (self::$tasknum > 1)
+ {
+ log::info("Current task id: ".self::$taskid);
+ }
+
+ // 先进先出
+ $link = $this->queue_rpop();
+ $link = $this->link_uncompress($link);
+ $url = $link['url'];
+
+ // 标记为已爬取网页
+ $this->incr_collected_url_num($url);
+
+ // 爬取页面开始时间
+ $page_time_start = microtime(true);
+
+ requests::$input_encoding = null;
+ $html = $this->request_url($url, $link);
+
+ if (!$html)
+ {
+ return false;
+ }
+ // 当前正在爬取的网页页面的对象
+ $page = array(
+ 'url' => $url,
+ 'raw' => $html,
+ 'request' => array(
+ 'url' => $url,
+ 'method' => $link['method'],
+ 'headers' => $link['headers'],
+ 'params' => $link['params'],
+ 'context_data' => $link['context_data'],
+ 'try_num' => $link['try_num'],
+ 'max_try' => $link['max_try'],
+ 'depth' => $link['depth'],
+ 'taskid' => self::$taskid,
+ ),
+ );
+ unset($html);
+
+ //--------------------------------------------------------------------------------
+ // 处理回调函数
+ //--------------------------------------------------------------------------------
+
+ // 判断当前网页是否被反爬虫了, 需要开发者实现
+ if ($this->is_anti_spider)
+ {
+ $is_anti_spider = call_user_func($this->is_anti_spider, $url, $page['raw'], $this);
+ // 如果在回调函数里面判断被反爬虫并且返回true
+ if ($is_anti_spider)
+ {
+ return false;
+ }
+ }
+
+ // 在一个网页下载完成之后调用. 主要用来对下载的网页进行处理.
+ // 比如下载了某个网页, 希望向网页的body中添加html标签
+ if ($this->on_download_page)
+ {
+ $return = call_user_func($this->on_download_page, $page, $this);
+ // 针对那些老是忘记return的人
+ if (isset($return)) $page = $return;
+ }
+
+ // 是否从当前页面分析提取URL
+ // 回调函数如果返回false表示不需要再从此网页中发现待爬url
+ $is_find_url = true;
+ if ($link['url_type'] == 'scan_page')
+ {
+ if ($this->on_scan_page)
+ {
+ $return = call_user_func($this->on_scan_page, $page, $page['raw'], $this);
+ if (isset($return)) $is_find_url = $return;
+ }
+ }
+ elseif ($link['url_type'] == 'list_page')
+ {
+ if ($this->on_list_page)
+ {
+ $return = call_user_func($this->on_list_page, $page, $page['raw'], $this);
+ if (isset($return)) $is_find_url = $return;
+ }
+ }
+ elseif ($link['url_type'] == 'content_page')
+ {
+ if ($this->on_content_page)
+ {
+ $return = call_user_func($this->on_content_page, $page, $page['raw'], $this);
+ if (isset($return)) $is_find_url = $return;
+ }
+ }
+
+ // on_scan_page、on_list_page、on_content_page 返回false表示不需要再从此网页中发现待爬url
+ if ($is_find_url)
+ {
+ // 如果深度没有超过最大深度, 获取下一级URL
+ if (self::$configs['max_depth'] == 0 || $link['depth'] < self::$configs['max_depth'])
+ {
+ // 分析提取HTML页面中的URL
+ $this->get_urls($page['raw'], $url, $link['depth'] + 1);
+ }
+ }
+
+ // 如果是内容页, 分析提取HTML页面中的字段
+ // 列表页也可以提取数据的, source_type: urlcontext, 未实现
+ if ($link['url_type'] == 'content_page')
+ {
+ $this->get_html_fields($page['raw'], $url, $page);
+ }
+
+ // 如果当前深度大于缓存的, 更新缓存
+ $this->incr_depth_num($link['depth']);
+
+ // 处理页面耗时时间
+ $time_run = round(microtime(true) - $page_time_start, 3);
+ log::debug("Success process page {$url} in {$time_run} s");
+
+ $spider_time_run = util::time2second(intval(microtime(true) - self::$time_start));
+ log::info("Spider running in {$spider_time_run}");
+
+ // 爬虫爬取每个网页的时间间隔, 单位: 毫秒
+ if (!isset(self::$configs['interval']))
+ {
+ // 默认睡眠100毫秒, 太快了会被认为是ddos
+ self::$configs['interval'] = 100;
+ }
+ usleep(self::$configs['interval'] * 1000);
+ }
+
+ /**
+ * 下载网页, 得到网页内容
+ *
+ * @param mixed $url
+ * @param mixed $link
+ * @return void
+ * @author seatle
+ * @created time :2016-09-18 10:17
+ */
+ public function request_url($url, $link = array())
+ {
+ $time_start = microtime(true);
+
+ //$url = "http://www.qiushibaike.com/article/117568316";
+
+ // 设置了编码就不要让requests去判断了
+ if (isset(self::$configs['input_encoding']))
+ {
+ requests::$input_encoding = self::$configs['input_encoding'];
+ }
+ // 得到的编码如果不是utf-8的要转成utf-8, 因为xpath只支持utf-8
+ requests::$output_encoding = 'utf-8';
+ requests::set_timeout(self::$configs['timeout']);
+ requests::set_useragent(self::$configs['user_agent']);
+ if (self::$configs['user_agents'])
+ {
+ requests::set_useragents(self::$configs['user_agents']);
+ }
+ if (self::$configs['client_ip'])
+ {
+ requests::set_client_ip(self::$configs['client_ip']);
+ }
+ if (self::$configs['client_ips'])
+ {
+ requests::set_client_ips(self::$configs['client_ips']);
+ }
+
+ // 是否设置了代理
+ if (!empty($link['proxies']))
+ {
+ requests::set_proxies($link['proxies']);
+ // 自动切换IP
+ requests::set_header('Proxy-Switch-Ip', 'yes');
+ }
+
+ // 如何设置了 HTTP Headers
+ if (!empty($link['headers']))
+ {
+ foreach ($link['headers'] as $k=>$v)
+ {
+ requests::set_header($k, $v);
+ }
+ }
+
+ $method = empty($link['method']) ? 'get' : strtolower($link['method']);
+ $params = empty($link['params']) ? array() : $link['params'];
+ $html = requests::$method($url, $params);
+ // 此url附加的数据不为空, 比如内容页需要列表页一些数据, 拼接到后面去
+ if ($html && !empty($link['context_data']))
+ {
+ $html .= $link['context_data'];
+ }
+
+ $http_code = requests::$status_code;
+
+ if ($this->on_status_code)
+ {
+ $return = call_user_func($this->on_status_code, $http_code, $url, $html, $this);
+ if (isset($return))
+ {
+ $html = $return;
+ }
+ if (!$html)
+ {
+ return false;
+ }
+ }
+
+ if ($http_code != 200)
+ {
+ // 如果是301、302跳转, 抓取跳转后的网页内容
+ if ($http_code == 301 || $http_code == 302)
+ {
+ $info = requests::$info;
+ if (isset($info['redirect_url']))
+ {
+ $url = $info['redirect_url'];
+ requests::$input_encoding = null;
+ $html = $this->request_url($url, $link);
+ if ($html && !empty($link['context_data']))
+ {
+ $html .= $link['context_data'];
+ }
+ }
+ else
+ {
+ return false;
+ }
+ }
+ else
+ {
+ if ($http_code == 407)
+ {
+ // 扔到队列头部去, 继续采集
+ $this->queue_rpush($link);
+ log::error("Failed to download page {$url}");
+ self::$collect_fail++;
+ }
+ elseif (in_array($http_code, array('0','502','503','429')))
+ {
+ // 采集次数加一
+ $link['try_num']++;
+ // 抓取次数 小于 允许抓取失败次数
+ if ( $link['try_num'] <= $link['max_try'] )
+ {
+ // 扔到队列头部去, 继续采集
+ $this->queue_rpush($link);
+ }
+ log::error("Failed to download page {$url}, retry({$link['try_num']})");
+ }
+ else
+ {
+ log::error("Failed to download page {$url}");
+ self::$collect_fail++;
+ }
+ log::error("HTTP CODE: {$http_code}");
+ return false;
+ }
+ }
+
+ // 爬取页面耗时时间
+ $time_run = round(microtime(true) - $time_start, 3);
+ log::debug("Success download page {$url} in {$time_run} s");
+ self::$collect_succ++;
+
+ return $html;
+ }
+
+ /**
+ * 分析提取HTML页面中的URL
+ *
+ * @param mixed $html HTML内容
+ * @param mixed $collect_url 抓取的URL, 用来拼凑完整页面的URL
+ * @return void
+ * @author seatle
+ * @created time :2016-09-18 10:17
+ */
+ public function get_urls($html, $collect_url, $depth = 0)
+ {
+ //--------------------------------------------------------------------------------
+ // 正则匹配出页面中的URL
+ //--------------------------------------------------------------------------------
+ $urls = selector::select($html, '//a/@href');
+ //preg_match_all("/ \r\n\t]{1,}/isU", $html, $matchs);
+ //$urls = array();
+ //if (!empty($matchs[1]))
+ //{
+ //foreach ($matchs[1] as $url)
+ //{
+ //$urls[] = str_replace(array("\"", "'",'&'), array("",'','&'), $url);
+ //}
+ //}
+
+ if (empty($urls))
+ {
+ return false;
+ }
+
+ // 如果页面上只有一个url,要把他转为数组,否则下面会报警告
+ if (!is_array($urls))
+ {
+ $urls = array($urls);
+ }
+
+ foreach ($urls as $key=>$url)
+ {
+ $urls[$key] = str_replace(array("\"", "'",'&'), array("",'','&'), $url);
+ }
+
+ //--------------------------------------------------------------------------------
+ // 过滤和拼凑URL
+ //--------------------------------------------------------------------------------
+ // 去除重复的RUL
+ $urls = array_unique($urls);
+ foreach ($urls as $k=>$url)
+ {
+ $url = trim($url);
+ if (empty($url))
+ {
+ continue;
+ }
+
+ $val = $this->fill_url($url, $collect_url);
+ if ($val)
+ {
+ $urls[$k] = $val;
+ }
+ else
+ {
+ unset($urls[$k]);
+ }
+ }
+
+ if (empty($urls))
+ {
+ return false;
+ }
+
+ //--------------------------------------------------------------------------------
+ // 把抓取到的URL放入队列
+ //--------------------------------------------------------------------------------
+ foreach ($urls as $url)
+ {
+ if ($this->on_fetch_url)
+ {
+ $return = call_user_func($this->on_fetch_url, $url, $this);
+ $url = isset($return) ? $return : $url;
+ unset($return);
+
+ // 如果 on_fetch_url 返回 false,此URL不入队列
+ if (!$url)
+ {
+ continue;
+ }
+ }
+
+ // 把当前页当做找到的url的Referer页
+ $options = array(
+ 'headers' => array(
+ 'Referer' => $collect_url,
+ )
+ );
+ $this->add_url($url, $options, $depth);
+ }
+ }
+
+ /**
+ * 获得完整的连接地址
+ *
+ * @param mixed $url 要检查的URL
+ * @param mixed $collect_url 从那个URL页面得到上面的URL
+ * @return void
+ * @author seatle
+ * @created time :2016-09-23 17:13
+ */
+ public function fill_url($url, $collect_url)
+ {
+ $url = trim($url);
+ $collect_url = trim($collect_url);
+
+ // 排除JavaScript的连接
+ //if (strpos($url, "javascript:") !== false)
+ if( preg_match("@^(javascript:|#|'|\")@i", $url) || $url == '')
+ {
+ return false;
+ }
+ // 排除没有被解析成功的语言标签
+ if(substr($url, 0, 3) == '<%=')
+ {
+ return false;
+ }
+
+ $parse_url = @parse_url($collect_url);
+ if (empty($parse_url['scheme']) || empty($parse_url['host']))
+ {
+ return false;
+ }
+ // 过滤mailto、tel、sms、wechat、sinaweibo、weixin等协议
+ if (!in_array($parse_url['scheme'], array("http", "https")))
+ {
+ return false;
+ }
+ $scheme = $parse_url['scheme'];
+ $domain = $parse_url['host'];
+ $path = empty($parse_url['path']) ? '' : $parse_url['path'];
+ $base_url_path = $domain.$path;
+ $base_url_path = preg_replace("/\/([^\/]*)\.(.*)$/","/",$base_url_path);
+ $base_url_path = preg_replace("/\/$/",'',$base_url_path);
+
+ $i = $path_step = 0;
+ $dstr = $pstr = '';
+ $pos = strpos($url,'#');
+ if($pos > 0)
+ {
+ // 去掉#和后面的字符串
+ $url = substr($url, 0, $pos);
+ }
+
+ // 京东变态的都是 //www.jd.com/111.html
+ if(substr($url, 0, 2) == '//')
+ {
+ $url = str_replace("//", "", $url);
+ }
+ // /1234.html
+ elseif($url[0] == '/')
+ {
+ $url = $domain.$url;
+ }
+ // ./1234.html、../1234.html 这种类型的
+ elseif($url[0] == '.')
+ {
+ if(!isset($url[2]))
+ {
+ return false;
+ }
+ else
+ {
+ $urls = explode('/',$url);
+ foreach($urls as $u)
+ {
+ if( $u == '..' )
+ {
+ $path_step++;
+ }
+ // 遇到 ., 不知道为什么不直接写$u == '.', 貌似一样的
+ else if( $i < count($urls)-1 )
+ {
+ $dstr .= $urls[$i].'/';
+ }
+ else
+ {
+ $dstr .= $urls[$i];
+ }
+ $i++;
+ }
+ $urls = explode('/',$base_url_path);
+ if(count($urls) <= $path_step)
+ {
+ return false;
+ }
+ else
+ {
+ $pstr = '';
+ for($i=0;$i
+ * @created time :2016-11-05 18:58
+ */
+ public function link_compress($link)
+ {
+ if (empty($link['url_type']))
+ {
+ unset($link['url_type']);
+ }
+
+ if (empty($link['method']) || strtolower($link['method']) == 'get')
+ {
+ unset($link['method']);
+ }
+
+ if (empty($link['headers']))
+ {
+ unset($link['headers']);
+ }
+
+ if (empty($link['params']))
+ {
+ unset($link['params']);
+ }
+
+ if (empty($link['context_data']))
+ {
+ unset($link['context_data']);
+ }
+
+ if (empty($link['proxies']))
+ {
+ unset($link['proxies']);
+ }
+
+ if (empty($link['try_num']))
+ {
+ unset($link['try_num']);
+ }
+
+ if (empty($link['max_try']))
+ {
+ unset($link['max_try']);
+ }
+
+ if (empty($link['depth']))
+ {
+ unset($link['depth']);
+ }
+ //$json = json_encode($link);
+ //$json = gzdeflate($json);
+ return $link;
+ }
+
+ /**
+ * 连接对象解压缩
+ *
+ * @param mixed $link
+ * @return void
+ * @author seatle
+ * @created time :2016-11-05 18:58
+ */
+ public function link_uncompress($link)
+ {
+ $link = array(
+ 'url' => isset($link['url']) ? $link['url'] : '',
+ 'url_type' => isset($link['url_type']) ? $link['url_type'] : '',
+ 'method' => isset($link['method']) ? $link['method'] : 'get',
+ 'headers' => isset($link['headers']) ? $link['headers'] : array(),
+ 'params' => isset($link['params']) ? $link['params'] : array(),
+ 'context_data' => isset($link['context_data']) ? $link['context_data'] : '',
+ 'proxies' => isset($link['proxies']) ? $link['proxies'] : self::$configs['proxies'],
+ 'try_num' => isset($link['try_num']) ? $link['try_num'] : 0,
+ 'max_try' => isset($link['max_try']) ? $link['max_try'] : self::$configs['max_try'],
+ 'depth' => isset($link['depth']) ? $link['depth'] : 0,
+ );
+
+ return $link;
+ }
+
+ /**
+ * 分析提取HTML页面中的字段
+ *
+ * @param mixed $html
+ * @return void
+ * @author seatle
+ * @created time :2016-09-18 10:17
+ */
+ public function get_html_fields($html, $url, $page)
+ {
+ $fields = $this->get_fields(self::$configs['fields'], $html, $url, $page);
+
+ if (!empty($fields))
+ {
+ if ($this->on_extract_page)
+ {
+ $return = call_user_func($this->on_extract_page, $page, $fields);
+ if (!isset($return))
+ {
+ log::warn("on_extract_page return value can't be empty");
+ }
+ elseif (!is_array($return))
+ {
+ log::warn("on_extract_page return value must be an array");
+ }
+ else
+ {
+ $fields = $return;
+ }
+ }
+
+ if (isset($fields) && is_array($fields))
+ {
+ $fields_num = $this->incr_fields_num();
+ if (self::$configs['max_fields'] != 0 && $fields_num > self::$configs['max_fields'])
+ {
+ exit(0);
+ }
+
+ if (version_compare(PHP_VERSION,'5.4.0','<'))
+ {
+ $fields_str = json_encode($fields);
+ $fields_str = preg_replace_callback( "#\\\u([0-9a-f]{4})#i", function($matchs) {
+ return iconv('UCS-2BE', 'UTF-8', pack('H4', $matchs[1]));
+ }, $fields_str );
+ }
+ else
+ {
+ $fields_str = json_encode($fields, JSON_UNESCAPED_UNICODE);
+ }
+
+ if (util::is_win())
+ {
+ $fields_str = mb_convert_encoding($fields_str, 'gb2312', 'utf-8');
+ }
+ log::info("Result[{$fields_num}]: ".$fields_str);
+
+ // 如果设置了导出选项
+ if (!empty(self::$configs['export']))
+ {
+ self::$export_type = isset(self::$configs['export']['type']) ? self::$configs['export']['type'] : '';
+ if (self::$export_type == 'csv')
+ {
+ util::put_file(self::$export_file, util::format_csv($fields)."\n", FILE_APPEND);
+ }
+ elseif (self::$export_type == 'sql')
+ {
+ $sql = db::insert(self::$export_table, $fields, true);
+ util::put_file(self::$export_file, $sql.";\n", FILE_APPEND);
+ }
+ elseif (self::$export_type == 'db')
+ {
+ db::insert(self::$export_table, $fields);
+ }
+ }
+ }
+ }
+ }
+
+ /**
+ * 根据配置提取HTML代码块中的字段
+ *
+ * @param mixed $confs
+ * @param mixed $html
+ * @param mixed $page
+ * @return void
+ * @author seatle
+ * @created time :2016-09-23 17:13
+ */
+ public function get_fields($confs, $html, $url, $page)
+ {
+ $fields = array();
+ foreach ($confs as $conf)
+ {
+ // 当前field抽取到的内容是否是有多项
+ $repeated = isset($conf['repeated']) && $conf['repeated'] ? true : false;
+ // 当前field抽取到的内容是否必须有值
+ $required = isset($conf['required']) && $conf['required'] ? true : false;
+
+ if (empty($conf['name']))
+ {
+ log::error("The field name is null, please check your \"fields\" and add the name of the field\n");
+ exit;
+ }
+
+ $values = array();
+ // 如果定义抽取规则
+ if (!empty($conf['selector']))
+ {
+ // 如果这个field是上一个field的附带连接
+ if (isset($conf['source_type']) && $conf['source_type']=='attached_url')
+ {
+ // 取出上个field的内容作为连接, 内容分页是不进队列直接下载网页的
+ if (!empty($fields[$conf['attached_url']]))
+ {
+ $collect_url = $this->fill_url($fields[$conf['attached_url']], $url);
+ //log::debug("Find attached content page: {$collect_url}");
+ $link['url'] = $collect_url;
+ $link = $this->link_uncompress($link);
+ requests::$input_encoding = null;
+ $html = $this->request_url($collect_url, $link);
+ // 在一个attached_url对应的网页下载完成之后调用. 主要用来对下载的网页进行处理.
+ if ($this->on_download_attached_page)
+ {
+ $return = call_user_func($this->on_download_attached_page, $html, $this);
+ if (isset($return))
+ {
+ $html = $return;
+ }
+ }
+
+ // 请求获取完分页数据后把连接删除了
+ unset($fields[$conf['attached_url']]);
+ }
+ }
+
+ // 没有设置抽取规则的类型 或者 设置为 xpath
+ if (!isset($conf['selector_type']) || $conf['selector_type']=='xpath')
+ {
+ $values = $this->get_fields_xpath($html, $conf['selector'], $conf['name']);
+ }
+ elseif ($conf['selector_type']=='css')
+ {
+ $values = $this->get_fields_css($html, $conf['selector'], $conf['name']);
+ }
+ elseif ($conf['selector_type']=='regex')
+ {
+ $values = $this->get_fields_regex($html, $conf['selector'], $conf['name']);
+ }
+
+ // field不为空而且存在子配置
+ if (!empty($values) && !empty($conf['children']))
+ {
+ $child_values = array();
+ // 父项抽取到的html作为子项的提取内容
+ foreach ($values as $child_html)
+ {
+ // 递归调用本方法, 所以多少子项目都支持
+ $child_value = $this->get_fields($conf['children'], $child_html, $url, $page);
+ if (!empty($child_value))
+ {
+ $child_values[] = $child_value;
+ }
+ }
+ // 有子项就存子项的数组, 没有就存HTML代码块
+ if (!empty($child_values))
+ {
+ $values = $child_values;
+ }
+ }
+ }
+
+ if (empty($values))
+ {
+ // 如果值为空而且值设置为必须项, 跳出foreach循环
+ if ($required)
+ {
+ // 清空整个 fields
+ $fields = array();
+ break;
+ }
+ // 避免内容分页时attached_url拼接时候string + array了
+ $fields[$conf['name']] = '';
+ //$fields[$conf['name']] = array();
+ }
+ else
+ {
+ if (is_array($values))
+ {
+ if ($repeated)
+ {
+ $fields[$conf['name']] = $values;
+ }
+ else
+ {
+ $fields[$conf['name']] = $values[0];
+ }
+ }
+ else
+ {
+ $fields[$conf['name']] = $values;
+ }
+ // 不重复抽取则只取第一个元素
+ //$fields[$conf['name']] = $repeated ? $values : $values[0];
+ }
+ }
+
+ if (!empty($fields))
+ {
+ foreach ($fields as $fieldname => $data)
+ {
+ $pattern = "/ \r\n\t]{1,}/isU";
+ /*$pattern = "//i"; */
+ // 在抽取到field内容之后调用, 对其中包含的img标签进行回调处理
+ if ($this->on_handle_img && preg_match($pattern, $data))
+ {
+ $return = call_user_func($this->on_handle_img, $fieldname, $data);
+ if (!isset($return))
+ {
+ log::warn("on_handle_img return value can't be empty\n");
+ }
+ else
+ {
+ // 有数据才会执行 on_handle_img 方法, 所以这里不要被替换没了
+ $data = $return;
+ }
+ }
+
+ // 当一个field的内容被抽取到后进行的回调, 在此回调中可以对网页中抽取的内容作进一步处理
+ if ($this->on_extract_field)
+ {
+ $return = call_user_func($this->on_extract_field, $fieldname, $data, $page);
+ if (!isset($return))
+ {
+ log::warn("on_extract_field return value can't be empty\n");
+ }
+ else
+ {
+ // 有数据才会执行 on_extract_field 方法, 所以这里不要被替换没了
+ $fields[$fieldname] = $return;
+ }
+ }
+ }
+ }
+
+ return $fields;
+ }
+
+ /**
+ * 验证导出
+ *
+ * @return void
+ * @author seatle
+ * @created time :2016-10-02 23:37
+ */
+ public function check_export()
+ {
+ // 如果设置了导出选项
+ if (!empty(self::$configs['export']))
+ {
+ if (self::$export_type == 'csv')
+ {
+ if (empty(self::$export_file))
+ {
+ log::error("Export data into CSV files need to Set the file path.");
+ exit;
+ }
+ }
+ elseif (self::$export_type == 'sql')
+ {
+ if (empty(self::$export_file))
+ {
+ log::error("Export data into SQL files need to Set the file path.");
+ exit;
+ }
+ }
+ elseif (self::$export_type == 'db')
+ {
+ if (!function_exists('mysqli_connect'))
+ {
+ log::error("Export data to a database need Mysql support, Error: Unable to load mysqli extension.");
+ exit;
+ }
+
+ if (empty(self::$db_config))
+ {
+ log::error("Export data to a database need Mysql support, Error: You not set a config array for connect.");
+ exit;
+ }
+
+ $config = self::$db_config;
+ @mysqli_connect($config['host'], $config['user'], $config['pass'], $config['name'], $config['port']);
+ if(mysqli_connect_errno())
+ {
+ log::error("Export data to a database need Mysql support, Error: ".mysqli_connect_error());
+ exit;
+ }
+
+ db::set_connect('default', $config);
+ db::_init();
+
+ if (!db::table_exists(self::$export_table))
+ {
+ log::error("Table ".self::$export_table." does not exist");
+ exit;
+ }
+ }
+ }
+ }
+
+ public function check_cache()
+ {
+ if (!self::$use_redis || self::$save_running_state)
+ {
+ return false;
+ }
+
+ //if (queue::exists("collect_queue"))
+ $keys = queue::keys("*");
+ $count = count($keys);
+ if ($count != 0)
+ {
+ // After this operation, 4,318 kB of additional disk space will be used.
+ // Do you want to continue? [Y/n]
+ //$msg = "发现Redis中有采集数据, 是否继续执行, 不继续则清空Redis数据重新采集\n";
+ $msg = "Found that the data of Redis, no continue will empty Redis data start again\n";
+ $msg .= "Do you want to continue? [Y/n]";
+ fwrite(STDOUT, $msg);
+ $arg = strtolower(trim(fgets(STDIN)));
+ $arg = empty($arg) || !in_array($arg, array('y','n')) ? 'y' : $arg;
+ if ($arg == 'n')
+ {
+ foreach ($keys as $key)
+ {
+ $key = str_replace($GLOBALS['config']['redis']['prefix'].":", "", $key);
+ queue::del($key);
+ }
+ }
+ }
+ }
+
+ public function init_redis()
+ {
+ if (!self::$use_redis)
+ {
+ return false;
+ }
+
+ // 添加当前服务器到服务器列表
+ $this->add_server_list(self::$serverid, self::$tasknum);
+
+ // 删除当前服务器的任务状态
+ // 对于被强制退出的进程有用
+ for ($i = 1; $i <= self::$tasknum; $i++)
+ {
+ $this->del_task_status(self::$serverid, $i);
+ }
+ }
+
+ /**
+ * 设置任务状态, 主进程和子进程每成功采集一个页面后调用
+ *
+ * @return void
+ * @author seatle
+ * @created time :2016-10-30 23:56
+ */
+ public function set_task_status()
+ {
+ // 每采集成功一个页面, 生成当前进程状态到文件, 供主进程使用
+ $mem = round(memory_get_usage(true)/(1024*1024),2);
+ $use_time = microtime(true) - self::$time_start;
+ $speed = round((self::$collect_succ + self::$collect_fail) / $use_time, 2);
+ $status = array(
+ 'id' => self::$taskid,
+ 'pid' => self::$taskpid,
+ 'mem' => $mem,
+ 'collect_succ' => self::$collect_succ,
+ 'collect_fail' => self::$collect_fail,
+ 'speed' => $speed,
+ );
+ $task_status = json_encode($status);
+
+ if (self::$use_redis)
+ {
+ $key = "server-".self::$serverid."-task_status-".self::$taskid;
+ queue::set($key, $task_status);
+ }
+ else
+ {
+ self::$task_status = array($task_status);
+ }
+ }
+
+ /**
+ * 删除任务状态
+ *
+ * @return void
+ * @author seatle
+ * @created time :2016-11-16 11:06
+ */
+ public function del_task_status($serverid, $taskid)
+ {
+ if (!self::$use_redis)
+ {
+ return false;
+ }
+ $key = "server-{$serverid}-task_status-{$taskid}";
+ queue::del($key);
+ }
+
+ /**
+ * 获得任务状态, 主进程才会调用
+ *
+ * @return void
+ * @author seatle
+ * @created time :2016-10-30 23:56
+ */
+ public function get_task_status($serverid, $taskid)
+ {
+ if (!self::$use_redis)
+ {
+ return false;
+ }
+
+ $key = "server-{$serverid}-task_status-{$taskid}";
+ $task_status = queue::get($key);
+ return $task_status;
+ }
+
+ /**
+ * 获得任务状态, 主进程才会调用
+ *
+ * @return void
+ * @author seatle
+ * @created time :2016-10-30 23:56
+ */
+ public function get_task_status_list($serverid = 1, $tasknum)
+ {
+ $task_status = array();
+ if (self::$use_redis)
+ {
+ for ($i = 1; $i <= $tasknum; $i++)
+ {
+ $key = "server-{$serverid}-task_status-".$i;
+ $task_status[] = queue::get($key);
+ }
+ }
+ else
+ {
+ $task_status = self::$task_status;
+ }
+ return $task_status;
+ }
+
+ /**
+ * 添加当前服务器信息到服务器列表
+ *
+ * @return void
+ * @author seatle
+ * @created time :2016-11-16 11:06
+ */
+ public function add_server_list($serverid, $tasknum)
+ {
+ if (!self::$use_redis)
+ {
+ return false;
+ }
+
+ // 更新服务器列表
+ $server_list_json = queue::get("server_list");
+ $server_list = array();
+ if (!$server_list_json)
+ {
+ $server_list[$serverid] = array(
+ 'serverid' => $serverid,
+ 'tasknum' => $tasknum,
+ 'time' => time(),
+ );
+ }
+ else
+ {
+ $server_list = json_decode($server_list_json, true);
+ $server_list[$serverid] = array(
+ 'serverid' => $serverid,
+ 'tasknum' => $tasknum,
+ 'time' => time(),
+ );
+ ksort($server_list);
+ }
+ queue::set("server_list", json_encode($server_list));
+ }
+
+ /**
+ * 从服务器列表中删除当前服务器信息
+ *
+ * @return void
+ * @author seatle
+ * @created time :2016-11-16 11:06
+ */
+ public function del_server_list($serverid)
+ {
+ if (!self::$use_redis)
+ {
+ return false;
+ }
+
+ $server_list_json = queue::get("server_list");
+ $server_list = array();
+ if ($server_list_json)
+ {
+ $server_list = json_decode($server_list_json, true);
+ if (isset($server_list[$serverid]))
+ {
+ unset($server_list[$serverid]);
+ }
+
+ // 删除完当前的任务列表如果还存在,就更新一下Redis
+ if (!empty($server_list))
+ {
+ ksort($server_list);
+ queue::set("server_list", json_encode($server_list));
+ }
+ }
+ }
+
+ /**
+ * 获取等待爬取页面数量
+ *
+ * @param mixed $url
+ * @return void
+ * @author seatle
+ * @created time :2016-09-23 17:13
+ */
+ public function get_collect_url_num()
+ {
+ if (self::$use_redis)
+ {
+ $count = queue::get("collect_urls_num");
+ }
+ else
+ {
+ $count = self::$collect_urls_num;
+ }
+ return $count;
+ }
+
+ /**
+ * 获取已经爬取页面数量
+ *
+ * @param mixed $url
+ * @return void
+ * @author seatle
+ * @created time :2016-09-23 17:13
+ */
+ public function get_collected_url_num()
+ {
+ if (self::$use_redis)
+ {
+ $count = queue::get("collected_urls_num");
+ }
+ else
+ {
+ $count = self::$collected_urls_num;
+ }
+ return $count;
+ }
+
+ /**
+ * 已采集页面数量加一
+ *
+ * @param mixed $url
+ * @return void
+ * @author seatle
+ * @created time :2016-09-23 17:13
+ */
+ public function incr_collected_url_num($url)
+ {
+ if (self::$use_redis)
+ {
+ queue::incr("collected_urls_num");
+ }
+ else
+ {
+ self::$collected_urls_num++;
+ }
+ }
+
+ /**
+ * 从队列左边插入
+ *
+ * @return void
+ * @author seatle
+ * @created time :2016-09-23 17:13
+ */
+ public function queue_lpush($link = array(), $allowed_repeat = false)
+ {
+ if (empty($link) || empty($link['url']))
+ {
+ return false;
+ }
+
+ $url = $link['url'];
+ $link = $this->link_compress($link);
+
+ $status = false;
+ if (self::$use_redis)
+ {
+ $key = "collect_urls-".md5($url);
+ $lock = "lock-".$key;
+ // 加锁: 一个进程一个进程轮流处理
+ if (queue::lock($lock))
+ {
+ $exists = queue::exists($key);
+ // 不存在或者当然URL可重复入
+ if (!$exists || $allowed_repeat)
+ {
+ // 待爬取网页记录数加一
+ queue::incr("collect_urls_num");
+ // 先标记为待爬取网页
+ queue::set($key, time());
+ // 入队列
+ $link = json_encode($link);
+ queue::lpush("collect_queue", $link);
+ $status = true;
+ }
+ // 解锁
+ queue::unlock($lock);
+ }
+ }
+ else
+ {
+ $key = md5($url);
+ if (!array_key_exists($key, self::$collect_urls))
+ {
+ self::$collect_urls_num++;
+ self::$collect_urls[$key] = time();
+ array_push(self::$collect_queue, $link);
+ $status = true;
+ }
+ }
+ return $status;
+ }
+
+ /**
+ * 从队列右边插入
+ *
+ * @return void
+ * @author seatle
+ * @created time :2016-09-23 17:13
+ */
+ public function queue_rpush($link = array(), $allowed_repeat = false)
+ {
+ if (empty($link) || empty($link['url']))
+ {
+ return false;
+ }
+
+ $url = $link['url'];
+
+ $status = false;
+ if (self::$use_redis)
+ {
+ $key = "collect_urls-".md5($url);
+ $lock = "lock-".$key;
+ // 加锁: 一个进程一个进程轮流处理
+ if (queue::lock($lock))
+ {
+ $exists = queue::exists($key);
+ // 不存在或者当然URL可重复入
+ if (!$exists || $allowed_repeat)
+ {
+ // 待爬取网页记录数加一
+ queue::incr("collect_urls_num");
+ // 先标记为待爬取网页
+ queue::set($key, time());
+ // 入队列
+ $link = json_encode($link);
+ queue::rpush("collect_queue", $link);
+ $status = true;
+ }
+ // 解锁
+ queue::unlock($lock);
+ }
+ }
+ else
+ {
+ $key = md5($url);
+ if (!array_key_exists($key, self::$collect_urls))
+ {
+ self::$collect_urls_num++;
+ self::$collect_urls[$key] = time();
+ array_unshift(self::$collect_queue, $link);
+ $status = true;
+ }
+ }
+ return $status;
+ }
+
+ /**
+ * 从队列左边取出
+ * 后进先出
+ * 可以避免采集内容页有分页的时候采集失败数据拼凑不全
+ * 还可以按顺序采集列表页
+ *
+ * @return void
+ * @author seatle
+ * @created time :2016-09-23 17:13
+ */
+ public function queue_lpop()
+ {
+ if (self::$use_redis)
+ {
+ $link = queue::lpop("collect_queue");
+ $link = json_decode($link, true);
+ }
+ else
+ {
+ $link = array_pop(self::$collect_queue);
+ }
+ return $link;
+ }
+
+ /**
+ * 从队列右边取出
+ *
+ * @return void
+ * @author seatle
+ * @created time :2016-09-23 17:13
+ */
+ public function queue_rpop()
+ {
+ if (self::$use_redis)
+ {
+ $link = queue::rpop("collect_queue");
+ $link = json_decode($link, true);
+ }
+ else
+ {
+ $link = array_shift(self::$collect_queue);
+ }
+ return $link;
+ }
+
+ /**
+ * 队列长度
+ *
+ * @return void
+ * @author seatle
+ * @created time :2016-09-23 17:13
+ */
+ public function queue_lsize()
+ {
+ if (self::$use_redis)
+ {
+ $lsize = queue::lsize("collect_queue");
+ }
+ else
+ {
+ $lsize = count(self::$collect_queue);
+ }
+ return $lsize;
+ }
+
+ /**
+ * 采集深度加一
+ *
+ * @return void
+ * @author seatle
+ * @created time :2016-09-23 17:13
+ */
+ public function incr_depth_num($depth)
+ {
+ if (self::$use_redis)
+ {
+ $lock = "lock-depth_num";
+ // 锁2秒
+ if (queue::lock($lock, time(), 2))
+ {
+ if (queue::get("depth_num") < $depth)
+ {
+ queue::set("depth_num", $depth);
+ }
+
+ queue::unlock($lock);
+ }
+ }
+ else
+ {
+ if (self::$depth_num < $depth)
+ {
+ self::$depth_num = $depth;
+ }
+ }
+ }
+
+ /**
+ * 获得采集深度
+ *
+ * @return void
+ * @author seatle
+ * @created time :2016-09-23 17:13
+ */
+ public function get_depth_num()
+ {
+ if (self::$use_redis)
+ {
+ $depth_num = queue::get("depth_num");
+ return $depth_num ? $depth_num : 0;
+ }
+ else
+ {
+ return self::$depth_num;
+ }
+ }
+
+ /**
+ * 提取到的field数目加一
+ *
+ * @return void
+ * @author seatle
+ * @created time :2016-09-23 17:13
+ */
+ public function incr_fields_num()
+ {
+ if (self::$use_redis)
+ {
+ $fields_num = queue::incr("fields_num");
+ }
+ else
+ {
+ self::$fields_num++;
+ $fields_num = self::$fields_num;
+ }
+ return $fields_num;
+ }
+
+ /**
+ * 提取到的field数目
+ *
+ * @return void
+ * @author seatle
+ * @created time :2016-09-23 17:13
+ */
+ public function get_fields_num()
+ {
+ if (self::$use_redis)
+ {
+ $fields_num = queue::get("fields_num");
+ }
+ else
+ {
+ $fields_num = self::$fields_num;
+ }
+ return $fields_num ? $fields_num : 0;
+ }
+
+ /**
+ * 采用xpath分析提取字段
+ *
+ * @param mixed $html
+ * @param mixed $selector
+ * @return void
+ * @author seatle
+ * @created time :2016-09-18 10:17
+ */
+ public function get_fields_xpath($html, $selector, $fieldname)
+ {
+ $result = selector::select($html, $selector);
+ if (selector::$error)
+ {
+ log::error("Field(\"{$fieldname}\") ".selector::$error."\n");
+ }
+ return $result;
+ }
+
+ /**
+ * 采用正则分析提取字段
+ *
+ * @param mixed $html
+ * @param mixed $selector
+ * @return void
+ * @author seatle
+ * @created time :2016-09-18 10:17
+ */
+ public function get_fields_regex($html, $selector, $fieldname)
+ {
+ $result = selector::select($html, $selector, 'regex');
+ if (selector::$error)
+ {
+ log::error("Field(\"{$fieldname}\") ".selector::$error."\n");
+ }
+ return $result;
+ }
+
+ /**
+ * 采用CSS选择器提取字段
+ *
+ * @param mixed $html
+ * @param mixed $selector
+ * @param mixed $fieldname
+ * @return void
+ * @author seatle
+ * @created time :2016-09-18 10:17
+ */
+ public function get_fields_css($html, $selector, $fieldname)
+ {
+ $result = selector::select($html, $selector, 'css');
+ if (selector::$error)
+ {
+ log::error("Field(\"{$fieldname}\") ".selector::$error."\n");
+ }
+ return $result;
+ }
+
+ /**
+ * 清空shell输出内容
+ *
+ * @return void
+ * @author seatle
+ * @created time :2016-11-16 11:06
+ */
+ public function clear_echo()
+ {
+ $arr = array(27, 91, 72, 27, 91, 50, 74);
+ foreach ($arr as $a)
+ {
+ print chr($a);
+ }
+ //array_map(create_function('$a', 'print chr($a);'), array(27, 91, 72, 27, 91, 50, 74));
+ }
+
+ /**
+ * 替换shell输出内容
+ *
+ * @param mixed $message
+ * @param mixed $force_clear_lines
+ * @return void
+ * @author seatle
+ * @created time :2016-11-16 11:06
+ */
+ public function replace_echo($message, $force_clear_lines = NULL)
+ {
+ static $last_lines = 0;
+
+ if(!is_null($force_clear_lines))
+ {
+ $last_lines = $force_clear_lines;
+ }
+
+ // 获取终端宽度
+ $toss = $status = null;
+ $term_width = exec('tput cols', $toss, $status);
+ if($status || empty($term_width))
+ {
+ $term_width = 64; // Arbitrary fall-back term width.
+ }
+
+ $line_count = 0;
+ foreach(explode("\n", $message) as $line)
+ {
+ $line_count += count(str_split($line, $term_width));
+ }
+
+ // Erasure MAGIC: Clear as many lines as the last output had.
+ for($i = 0; $i < $last_lines; $i++)
+ {
+ // Return to the beginning of the line
+ echo "\r";
+ // Erase to the end of the line
+ echo "\033[K";
+ // Move cursor Up a line
+ echo "\033[1A";
+ // Return to the beginning of the line
+ echo "\r";
+ // Erase to the end of the line
+ echo "\033[K";
+ // Return to the beginning of the line
+ echo "\r";
+ // Can be consolodated into
+ // echo "\r\033[K\033[1A\r\033[K\r";
+ }
+
+ $last_lines = $line_count;
+
+ echo $message."\n";
+ }
+
+ /**
+ * 展示启动界面, Windows 不会到这里来
+ * @return void
+ */
+ public function display_ui()
+ {
+ $loadavg = sys_getloadavg();
+ foreach ($loadavg as $k=>$v)
+ {
+ $loadavg[$k] = round($v, 2);
+ }
+ $display_str = "\033[1A\n\033[K-----------------------------\033[47;30m PHPSPIDER \033[0m-----------------------------\n\033[0m";
+ //$display_str = "-----------------------------\033[47;30m PHPSPIDER \033[0m-----------------------------\n\033[0m";
+ $run_time_str = util::time2second(time()-self::$time_start, false);
+ $display_str .= 'PHPSpider version:' . self::VERSION . " PHP version:" . PHP_VERSION . "\n";
+ $display_str .= 'start time:'. date('Y-m-d H:i:s', self::$time_start).' run ' . $run_time_str . " \n";
+
+ $display_str .= 'spider name: ' . self::$configs['name'] . "\n";
+ if (self::$multiserver)
+ {
+ $display_str .= 'server id: ' . self::$serverid."\n";
+ }
+ $display_str .= 'task number: ' . self::$tasknum . "\n";
+ $display_str .= 'load average: ' . implode(", ", $loadavg) . "\n";
+ $display_str .= "document: https://doc.phpspider.org\n";
+
+ $display_str .= $this->display_task_ui();
+
+ if (self::$multiserver)
+ {
+ $display_str .= $this->display_server_ui();
+ }
+
+ $display_str .= $this->display_collect_ui();
+
+ // 清屏
+ //$this->clear_echo();
+ // 返回到第一行,第一列
+ //echo "\033[0;0H";
+ $display_str .= "---------------------------------------------------------------------\n";
+ $display_str .= "Press Ctrl-C to quit. Start success.";
+ if (self::$terminate)
+ {
+ $display_str .= "\n\033[33mWait for the process exits...\033[0m";
+ }
+ //echo $display_str;
+ $this->replace_echo($display_str);
+ }
+
+ public function display_task_ui()
+ {
+ $display_str = "-------------------------------\033[47;30m TASKS \033[0m-------------------------------\n";
+
+ $display_str .= "\033[47;30mtaskid\033[0m". str_pad('', self::$taskid_length+2-strlen('taskid')).
+ "\033[47;30mtaskpid\033[0m". str_pad('', self::$pid_length+2-strlen('taskpid')).
+ "\033[47;30mmem\033[0m". str_pad('', self::$mem_length+2-strlen('mem')).
+ "\033[47;30mcollect succ\033[0m". str_pad('', self::$urls_length-strlen('collect succ')).
+ "\033[47;30mcollect fail\033[0m". str_pad('', self::$urls_length-strlen('collect fail')).
+ "\033[47;30mspeed\033[0m". str_pad('', self::$speed_length+2-strlen('speed')).
+ "\n";
+
+ // "\033[32;40m [OK] \033[0m"
+ $task_status = $this->get_task_status_list(self::$serverid, self::$tasknum);
+ foreach ($task_status as $json)
+ {
+ $task = json_decode($json, true);
+ if (empty($task))
+ {
+ continue;
+ }
+ $display_str .= str_pad($task['id'], self::$taskid_length+2).
+ str_pad($task['pid'], self::$pid_length+2).
+ str_pad($task['mem']."MB", self::$mem_length+2).
+ str_pad($task['collect_succ'], self::$urls_length).
+ str_pad($task['collect_fail'], self::$urls_length).
+ str_pad($task['speed']."/s", self::$speed_length+2).
+ "\n";
+ }
+ //echo "\033[9;0H";
+ return $display_str;
+ }
+
+ public function display_server_ui()
+ {
+ $display_str = "-------------------------------\033[47;30m SERVER \033[0m------------------------------\n";
+
+ $display_str .= "\033[47;30mserver\033[0m". str_pad('', self::$server_length+2-strlen('serverid')).
+ "\033[47;30mtasknum\033[0m". str_pad('', self::$tasknum_length+2-strlen('tasknum')).
+ "\033[47;30mmem\033[0m". str_pad('', self::$mem_length+2-strlen('mem')).
+ "\033[47;30mcollect succ\033[0m". str_pad('', self::$urls_length-strlen('collect succ')).
+ "\033[47;30mcollect fail\033[0m". str_pad('', self::$urls_length-strlen('collect fail')).
+ "\033[47;30mspeed\033[0m". str_pad('', self::$speed_length+2-strlen('speed')).
+ "\n";
+
+ $server_list_json = queue::get("server_list");
+ $server_list = json_decode($server_list_json, true);
+ foreach ($server_list as $server)
+ {
+ $serverid = $server['serverid'];
+ $tasknum = $server['tasknum'];
+ $mem = 0;
+ $speed = 0;
+ $collect_succ = $collect_fail = 0;
+ $task_status = $this->get_task_status_list($serverid, $tasknum);
+ foreach ($task_status as $json)
+ {
+ $task = json_decode($json, true);
+ if (empty($task))
+ {
+ continue;
+ }
+ $mem += $task['mem'];
+ $speed += $task['speed'];
+ $collect_fail += $task['collect_fail'];
+ $collect_succ += $task['collect_succ'];
+ }
+
+ $display_str .= str_pad($serverid, self::$server_length).
+ str_pad($tasknum, self::$tasknum_length+2).
+ str_pad($mem."MB", self::$mem_length+2).
+ str_pad($collect_succ, self::$urls_length).
+ str_pad($collect_fail, self::$urls_length).
+ str_pad($speed."/s", self::$speed_length+2).
+ "\n";
+ }
+ return $display_str;
+ }
+
+ public function display_collect_ui()
+ {
+ $display_str = "---------------------------\033[47;30m COLLECT STATUS \033[0m--------------------------\n";
+
+ $display_str .= "\033[47;30mfind pages\033[0m". str_pad('', 16-strlen('find pages')).
+ "\033[47;30mqueue\033[0m". str_pad('', 14-strlen('queue')).
+ "\033[47;30mcollected\033[0m". str_pad('', 15-strlen('collected')).
+ "\033[47;30mfields\033[0m". str_pad('', 15-strlen('fields')).
+ "\033[47;30mdepth\033[0m". str_pad('', 12-strlen('depth')).
+ "\n";
+
+ $collect = $this->get_collect_url_num();
+ $collected = $this->get_collected_url_num();
+ $queue = $this->queue_lsize();
+ $fields = $this->get_fields_num();
+ $depth = $this->get_depth_num();
+ $display_str .= str_pad($collect, 16);
+ $display_str .= str_pad($queue, 14);
+ $display_str .= str_pad($collected, 15);
+ $display_str .= str_pad($fields, 15);
+ $display_str .= str_pad($depth, 12);
+ $display_str .= "\n";
+ return $display_str;
+ }
+
+ /**
+ * 判断是否附件文件
+ *
+ * @return void
+ * @author seatle
+ * @created time :2016-09-23 17:13
+ */
+ //public function is_attachment_file($url)
+ //{
+ //$mime_types = $GLOBALS['config']['mimetype'];
+ //$mime_types_flip = array_flip($mime_types);
+
+ //$pathinfo = pathinfo($url);
+ //$fileext = isset($pathinfo['extension']) ? $pathinfo['extension'] : '';
+
+ //$fileinfo = array();
+ //// 存在文件后缀并且是配置里面的后缀
+ //if (!empty($fileext) && isset($mime_types_flip[$fileext]))
+ //{
+ //stream_context_set_default(
+ //array(
+ //'http' => array(
+ //'method' => 'HEAD'
+ //)
+ //)
+ //);
+ //// 代理和Cookie以后实现, 方法和 file_get_contents 一样 使用 stream_context_create 设置
+ //$headers = get_headers($url, 1);
+ //if (strpos($headers[0], '302'))
+ //{
+ //$url = $headers['Location'];
+ //$headers = get_headers($url, 1);
+ //}
+ ////print_r($headers);
+ //$fileinfo = array(
+ //'basename' => isset($pathinfo['basename']) ? $pathinfo['basename'] : '',
+ //'filename' => isset($pathinfo['filename']) ? $pathinfo['filename'] : '',
+ //'fileext' => isset($pathinfo['extension']) ? $pathinfo['extension'] : '',
+ //'filesize' => isset($headers['Content-Length']) ? $headers['Content-Length'] : 0,
+ //'atime' => isset($headers['Date']) ? strtotime($headers['Date']) : time(),
+ //'mtime' => isset($headers['Last-Modified']) ? strtotime($headers['Last-Modified']) : time(),
+ //);
+
+ //$mime_type = 'html';
+ //$content_type = isset($headers['Content-Type']) ? $headers['Content-Type'] : '';
+ //if (!empty($content_type))
+ //{
+ //$mime_type = isset($GLOBALS['config']['mimetype'][$content_type]) ? $GLOBALS['config']['mimetype'][$content_type] : $mime_type;
+ //}
+ //$mime_types_flip = array_flip($mime_types);
+ //// 判断一下是不是文件名被加什么后缀了, 比如 http://www.xxxx.com/test.jpg?token=xxxxx
+ //if (!isset($mime_types_flip[$fileinfo['fileext']]))
+ //{
+ //$fileinfo['fileext'] = $mime_type;
+ //$fileinfo['basename'] = $fileinfo['filename'].'.'.$mime_type;
+ //}
+ //}
+ //return $fileinfo;
+ //}
+
+}
+
+
diff --git a/vendor/owner888/phpspider/core/phpspider.php b/vendor/owner888/phpspider/core/phpspider.php
new file mode 100644
index 0000000..0652c0b
--- /dev/null
+++ b/vendor/owner888/phpspider/core/phpspider.php
@@ -0,0 +1,3598 @@
+
+// +----------------------------------------------------------------------
+
+//----------------------------------
+// PHPSpider核心类文件
+// ***********
+// 泛域名抓取优化版 BY KEN a-site@foxmail.com
+// ***********
+// * 泛域名设置:domain = array('*')
+// * 增加子域名数量限制 $max_sub_num = 100
+//----------------------------------
+
+namespace phpspider\core;
+
+require_once __DIR__.'/constants.php';
+
+use Exception;
+use phpspider\core\db;
+use phpspider\core\log;
+use phpspider\core\queue;
+use phpspider\core\requests;
+use phpspider\core\selector;
+use phpspider\core\util;
+
+// 启动的时候生成data目录
+util::path_exists(PATH_DATA);
+util::path_exists(PATH_DATA.'/lock');
+util::path_exists(PATH_DATA.'/log');
+util::path_exists(PATH_DATA.'/cache');
+util::path_exists(PATH_DATA.'/status');
+
+class phpspider
+{
+ /**
+ * 版本号
+ * @var string
+ */
+ const VERSION = '2.1.5';
+
+ /**
+ * 爬虫爬取每个网页的时间间隔,0表示不延时, 单位: 毫秒
+ */
+ const INTERVAL = 100;
+
+ /**
+ * 爬虫爬取每个网页的超时时间, 单位: 秒
+ */
+ const TIMEOUT = 5;
+
+ /**
+ * 爬取失败次数, 不想失败重新爬取则设置为0
+ */
+ const MAX_TRY = 0;
+
+ /**
+ * 爬虫爬取网页所使用的浏览器类型: pc/Mac、ios、android
+ * 默认类型是PC
+ */
+ const AGENT_PC = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.139 Safari/537.36';
+ const AGENT_IOS = 'Mozilla/5.0 (iPhone; CPU iPhone OS 9_3_3 like Mac OS X) AppleWebKit/601.1.46 (KHTML, like Gecko) Version/9.0 Mobile/13G34 Safari/601.1';
+ const AGENT_ANDROID = 'Mozilla/5.0 (Linux; U; Android 6.0.1;zh_cn; Le X820 Build/FEXCNFN5801507014S) AppleWebKit/537.36 (KHTML, like Gecko)Version/4.0 Chrome/49.0.0.0 Mobile Safari/537.36 EUI Browser/5.8.015S';
+
+ /**
+ * pid文件的路径及名称
+ * @var string
+ */
+ //public static $pid_file = '';
+
+ /**
+ * 日志目录, 默认在data根目录下
+ * @var mixed
+ */
+ //public static $log_file = '';
+
+ /**
+ * 主任务进程ID
+ */
+ //public static $master_pid = 0;
+
+ /**
+ * 所有任务进程ID
+ */
+ //public static $taskpids = array();
+
+ /**
+ * Daemonize.
+ *
+ * @var bool
+ */
+ public static $daemonize = false;
+
+ /**
+ * 当前进程是否终止
+ */
+ public static $terminate = false;
+
+ /**
+ * 是否分布式
+ */
+ public static $multiserver = false;
+
+ /**
+ * 当前服务器ID
+ */
+ public static $serverid = 1;
+
+ /**
+ * 主任务进程
+ */
+ public static $taskmaster = true;
+
+ /**
+ * 当前任务ID
+ */
+ public static $taskid = 1;
+
+ /**
+ * 当前任务进程ID
+ */
+ public static $taskpid = 1;
+
+ /**
+ * 并发任务数
+ */
+ public static $tasknum = 1;
+
+ /**
+ * 生成
+ */
+ public static $fork_task_complete = false;
+
+ /**
+ * 是否使用Redis
+ */
+ public static $use_redis = false;
+
+ /**
+ * 是否保存爬虫运行状态
+ */
+ public static $save_running_state = false;
+
+ /**
+ * 配置
+ */
+ public static $configs = array();
+
+ /**
+ * 要抓取的URL队列
+ md5(url) => array(
+ 'url' => '', // 要爬取的URL
+ 'url_type' => '', // 要爬取的URL类型,scan_page、list_page、content_page
+ 'method' => 'get', // 默认为"GET"请求, 也支持"POST"请求
+ 'headers' => array(), // 此url的Headers, 可以为空
+ 'params' => array(), // 发送请求时需添加的参数, 可以为空
+ 'context_data'=> '', // 此url附加的数据, 可以为空
+ 'proxy' => false, // 是否使用代理
+ 'try_num' => 0 // 抓取次数
+ 'max_try' => 0 // 允许抓取失败次数
+ )
+ */
+ public static $collect_queue = array();
+
+ /**
+ * 要抓取的URL数组
+ * md5($url) => time()
+ */
+ public static $collect_urls = array();
+
+ /**
+ * 要抓取的URL数量
+ */
+ public static $collect_urls_num = 0;
+
+ /**
+ * 已经抓取的URL数量
+ */
+ public static $collected_urls_num = 0;
+
+ /**
+ * 当前进程采集成功数
+ */
+ public static $collect_succ = 0;
+
+ /**
+ * 当前进程采集失败数
+ */
+ public static $collect_fail = 0;
+
+ /**
+ * 提取到的字段数
+ */
+ public static $fields_num = 0;
+
+ /**
+ * 【KEN】提取到的页面数按域名计数容器 结构为 domain => number
+ */
+ public static $pages_num = array();
+
+ /**
+ * 【KEN】单域名允许抓取的最大页面数,0为不限制
+ */
+ public static $max_pages = 0;
+
+ /**
+ * 【KEN】花费的抓取时长计数容器 结构为 domain => number
+ */
+ public static $duration = array();
+
+ /**
+ * 【KEN】单域名允许抓取的最大时长,单位秒,0为不限制
+ */
+ public static $max_duration = 0;
+
+ /**
+ * 【KEN】单域名最大子域名发现数量 防止掉进蜘蛛池,推荐值:3000(多数大型网站上限)
+ */
+ public static $max_sub_num = 3000; //建议值 3000
+
+ /**
+ * 【KEN】子进程未获取任务,超时退出前,等待计时器
+ */
+
+ public static $stand_by_time = 0;
+
+ /**
+ * 【KEN】子进程未获取任务,超时退出前,最大等待时长/秒,全部任务束后,子进程将会等待的时间,以便有缓冲时间,获得新的任务
+ */
+ public static $max_stand_by_time = 60; //建议值 60
+
+ /**
+ * 【KEN】每个主机并发上限,降低对方网站流量压力和减少被阻挡概率,建议值 6 ,须与 queue_order = rand 一起使用
+ */
+ public static $max_task_per_host = 0; //0值和非0值会使用不同类型的队列缓存库,从0改为非0值或从非0值改为0需清空队列缓存库再运行,否则任务无法添加
+ public static $task_per_host_counter = array(); //计数容器
+
+ /**
+ * 采集深度
+ */
+ public static $depth_num = 0;
+
+ /**
+ * 爬虫开始时间
+ */
+ public static $time_start = 0;
+
+ /**
+ * 任务状态
+ */
+ public static $task_status = array();
+
+ // 导出类型配置
+ public static $export_type = '';
+ public static $export_file = '';
+ public static $export_conf = '';
+ public static $export_table = '';
+
+ // 数据库配置
+ public static $db_config = array();
+ // 队列配置
+ public static $queue_config = array();
+
+ // 运行面板参数长度
+ public static $server_length = 10;
+ public static $tasknum_length = 8;
+ public static $taskid_length = 8;
+ public static $pid_length = 8;
+ public static $mem_length = 8;
+ public static $urls_length = 15;
+ public static $speed_length = 6;
+
+ /**
+ * 爬虫初始化时调用, 用来指定一些爬取前的操作
+ *
+ * @var mixed
+ * @access public
+ */
+ public $on_start = null;
+
+ /**
+ * URL采集前调用
+ * 比如有时需要根据某个特定的URL,来决定这次的请求是否使用代理 / 或使用哪个代理
+ *
+ * @var mixed
+ * @access public
+ */
+ public $on_before_download_page = null;
+
+ /**
+ * 网页状态码回调
+ *
+ * @var mixed
+ * @access public
+ */
+ public $on_status_code = null;
+
+ /**
+ * 判断当前网页是否被反爬虫, 需要开发者实现
+ *
+ * @var mixed
+ * @access public
+ */
+ public $is_anti_spider = null;
+
+ /**
+ * 在一个网页下载完成之后调用, 主要用来对下载的网页进行处理
+ *
+ * @var mixed
+ * @access public
+ */
+ public $on_download_page = null;
+
+ /**
+ * 在一个attached_url对应的网页下载完成之后调用. 主要用来对下载的网页进行处理
+ *
+ * @var mixed
+ * @access public
+ */
+ public $on_download_attached_page = null;
+
+ /**
+ * 当前页面抽取到URL
+ *
+ * @var mixed
+ * @access public
+ */
+ public $on_fetch_url = null;
+
+ /**
+ * URL属于入口页
+ * 在爬取到入口url的内容之后, 添加新的url到待爬队列之前调用
+ * 主要用来发现新的待爬url, 并且能给新发现的url附加数据
+ *
+ * @var mixed
+ * @access public
+ */
+ public $on_scan_page = null;
+
+ /**
+ * URL属于列表页
+ * 在爬取到列表页url的内容之后, 添加新的url到待爬队列之前调用
+ * 主要用来发现新的待爬url, 并且能给新发现的url附加数据
+ *
+ * @var mixed
+ * @access public
+ */
+ public $on_list_page = null;
+
+ /**
+ * URL属于内容页
+ * 在爬取到内容页url的内容之后, 添加新的url到待爬队列之前调用
+ * 主要用来发现新的待爬url, 并且能给新发现的url附加数据
+ *
+ * @var mixed
+ * @access public
+ */
+ public $on_content_page = null;
+
+ /**
+ * 在抽取到field内容之后调用, 对其中包含的img标签进行回调处理
+ *
+ * @var mixed
+ * @access public
+ */
+ public $on_handle_img = null;
+
+ /**
+ * 当一个field的内容被抽取到后进行的回调, 在此回调中可以对网页中抽取的内容作进一步处理
+ *
+ * @var mixed
+ * @access public
+ */
+ public $on_extract_field = null;
+
+ /**
+ * 在一个网页的所有field抽取完成之后, 可能需要对field进一步处理, 以发布到自己的网站
+ *
+ * @var mixed
+ * @access public
+ */
+ public $on_extract_page = null;
+
+ /**
+ * 如果抓取的页面是一个附件文件, 比如图片、视频、二进制文件、apk、ipad、exe
+ * 就不去分析他的内容提取field了, 提取field只针对HTML
+ *
+ * @var mixed
+ * @access public
+ */
+ public $on_attachment_file = null;
+
+ public function __construct($configs = array())
+ {
+ // 产生时钟云,解决php7下面ctrl+c无法停止bug
+ declare(ticks = 1);
+
+ // 先打开以显示验证报错内容
+ log::$log_show = true;
+ log::$log_file = isset($configs['log_file']) ? $configs['log_file'] : PATH_DATA.'/phpspider.log';
+ log::$log_type = isset($configs['log_type']) ? $configs['log_type'] : false;
+
+ // 彩蛋
+ $included_files = get_included_files();
+ $content = file_get_contents($included_files[0]);
+ if (!preg_match("#/\* Do NOT delete this comment \*/#", $content) || !preg_match("#/\* 不要删除这段注释 \*/#", $content))
+ {
+ $msg = "Unknown error...";
+ log::error($msg);
+ exit;
+ }
+
+ $configs['name'] = isset($configs['name']) ? $configs['name'] : 'phpspider';
+ $configs['proxy'] = isset($configs['proxy']) ? $configs['proxy'] : false;
+ $configs['user_agent'] = isset($configs['user_agent']) ? $configs['user_agent'] : self::AGENT_PC;
+ $configs['client_ip'] = isset($configs['client_ip']) ? $configs['client_ip'] : array();
+ $configs['interval'] = isset($configs['interval']) ? $configs['interval'] : self::INTERVAL;
+ $configs['timeout'] = isset($configs['timeout']) ? $configs['timeout'] : self::TIMEOUT;
+ $configs['max_try'] = isset($configs['max_try']) ? $configs['max_try'] : self::MAX_TRY;
+ $configs['max_depth'] = isset($configs['max_depth']) ? $configs['max_depth'] : 0;
+ $configs['max_fields'] = isset($configs['max_fields']) ? $configs['max_fields'] : 0;
+ $configs['export'] = isset($configs['export']) ? $configs['export'] : array();
+ //新增参数 BY KEN
+ $configs['max_pages'] = isset($configs['max_pages']) ? $configs['max_pages'] : self::$max_pages;
+ $configs['max_duration'] = isset($configs['max_duration']) ? $configs['max_duration'] : self::$max_duration;
+ $configs['max_sub_num'] = isset($configs['max_sub_num']) ? $configs['max_sub_num'] : self::$max_sub_num;
+ $configs['max_stand_by_time'] = isset($configs['max_stand_by_time']) ? $configs['max_stand_by_time'] : self::$max_stand_by_time;
+ $configs['max_task_per_host'] = isset($configs['max_task_per_host']) ? $configs['max_task_per_host'] : self::$max_task_per_host;
+ //启用 host并发上限时,队列参数强制为随机
+ if ($configs['max_task_per_host'] > 0)
+ {
+ $configs['queue_order'] = 'rand';
+ }
+ else
+ {
+ $configs['queue_order'] = isset($configs['queue_order']) ? $configs['queue_order'] : 'list';
+ }
+
+ // csv、sql、db
+ self::$export_type = isset($configs['export']['type']) ? $configs['export']['type'] : '';
+ self::$export_file = isset($configs['export']['file']) ? $configs['export']['file'] : '';
+ self::$export_table = isset($configs['export']['table']) ? $configs['export']['table'] : '';
+ self::$db_config = isset($configs['db_config']) ? $configs['db_config'] : array();
+ self::$queue_config = isset($configs['queue_config']) ? $configs['queue_config'] : array();
+
+ // 是否设置了并发任务数, 并且大于1, 而且不是windows环境
+ if (isset($configs['tasknum']) && $configs['tasknum'] > 1 && !util::is_win())
+ {
+ self::$tasknum = $configs['tasknum'];
+ }
+
+ // 是否设置了保留运行状态
+ if (isset($configs['save_running_state']))
+ {
+ self::$save_running_state = $configs['save_running_state'];
+ }
+
+ // 是否分布式
+ if (isset($configs['multiserver']))
+ {
+ self::$multiserver = $configs['multiserver'];
+ }
+
+ // 当前服务器ID
+ if (isset($configs['serverid']))
+ {
+ self::$serverid = $configs['serverid'];
+ }
+
+ // 不同项目的采集以采集名称作为前缀区分 缩短 spider name md5长度到4位,减少内存占用
+ if (isset(self::$queue_config['prefix']))
+ {
+ self::$queue_config['prefix'] = self::$queue_config['prefix'].'-'.substr(md5($configs['name']), 0, 4);
+ }
+
+ self::$configs = $configs;
+ }
+
+ public function get_config($name)
+ {
+ return empty(self::$configs[$name]) ? array() : self::$configs[$name];
+ }
+
+ public function add_scan_url($url, $options = array(), $allowed_repeat = true)
+ {
+ // 投递状态
+ $status = false;
+ //限制最大子域名数量
+ if ( ! empty(self::$configs['max_sub_num']))
+ {
+ //抓取到的子域名超过指定数量,就丢掉此域名
+ $sub_domain_count = $this->sub_domain_count($url);
+ if ($sub_domain_count > self::$configs['max_sub_num'])
+ {
+ log::debug('Task('.self::$taskid.') subdomin = '.$sub_domain_count.' more than '.self::$configs['max_sub_num'].",add_scan_url $url [Skip]");
+ return $status;
+ }
+ }
+
+ $link = $options;
+ $link['url'] = $url;
+ $link['url_type'] = 'scan_page';
+ $link = $this->link_uncompress($link);
+
+ if ($this->is_content_page($url))
+ {
+ $link['url_type'] = 'content_page';
+ $status = $this->queue_lpush($link, $allowed_repeat);
+ }
+ elseif ($this->is_list_page($url))
+ {
+ $link['url_type'] = 'list_page';
+ $status = $this->queue_lpush($link, $allowed_repeat);
+ }
+ else
+ {
+ $status = $this->queue_lpush($link, $allowed_repeat);
+ }
+
+ if ($status)
+ {
+ if ($link['url_type'] == 'scan_page')
+ {
+ log::debug("Find scan page: {$url}");
+ }
+ elseif ($link['url_type'] == 'content_page')
+ {
+ log::debug("Find content page: {$url}");
+ }
+ elseif ($link['url_type'] == 'list_page')
+ {
+ log::debug("Find list page: {$url}");
+ }
+ }
+
+ return $status;
+ }
+
+ /**
+ * 一般在 on_scan_page 和 on_list_page 回调函数中调用, 用来往待爬队列中添加url
+ * 两个进程同时调用这个方法, 传递相同url的时候, 就会出现url重复进入队列
+ *
+ * @param mixed $url
+ * @param mixed $options
+ * @return void
+ * @author seatle
+ * @created time :2016-09-18 10:17
+ */
+ public function add_url($url, $options = array(), $depth = 0)
+ {
+ // 投递状态
+ $status = false;
+ //限制最大子域名数量
+ if ( ! empty(self::$configs['max_sub_num']))
+ {
+ //抓取超过 max_sub_num 子域名的,就丢掉
+ $sub_domain_count = $this->sub_domain_count($url);
+ if ($sub_domain_count > self::$configs['max_sub_num'])
+ {
+ log::debug('Task('.self::$taskid.') subdomin = '.$sub_domain_count.' more than '.self::$configs['max_sub_num'].",add_url $url [Skip]");
+ //echo '[on_download_page] ' . $domain . "'s subdomin > 1000 ,Skip!\n";
+ return $status;
+ }
+ }
+ $link = $options;
+ $link['url'] = $url;
+ $link['depth'] = $depth;
+ $link = $this->link_uncompress($link);
+
+ if ($this->is_content_page($url))
+ {
+ $link['url_type'] = 'content_page';
+ $status = $this->queue_lpush($link);
+ }
+ elseif ($this->is_list_page($url))
+ {
+ $link['url_type'] = 'list_page';
+ $status = $this->queue_lpush($link);
+ }
+
+ if ($status)
+ {
+ if ($link['url_type'] == 'scan_page')
+ {
+ log::debug("Find scan page: {$url}");
+ }
+ elseif ($link['url_type'] == 'content_page')
+ {
+ log::debug("Find content page: {$url}");
+ }
+ elseif ($link['url_type'] == 'list_page')
+ {
+ log::debug("Find list page: {$url}");
+ }
+ }
+
+ return $status;
+ }
+
+ /**
+ * 是否入口页面
+ *
+ * @param mixed $url
+ * @return void
+ * @author seatle
+ * @created time :2016-10-12 19:06
+ */
+ public function is_scan_page($url)
+ {
+ $parse_url = parse_url($url);
+ //2018-1-3 通配所有域名
+ if ( ! empty($parse_url['host']) and self::$configs['domains'][0] == '*')
+ {
+ return true;
+ }
+ //限定域名
+ if (empty($parse_url['host']) || ! in_array($parse_url['host'], self::$configs['domains']))
+ {
+ return false;
+ }
+ return true;
+ }
+
+ /**
+ * 是否列表页面
+ *
+ * @param mixed $url
+ * @return void
+ * @author seatle
+ * @created time :2016-10-12 19:06
+ */
+ public function is_list_page($url)
+ {
+ $result = false;
+ //过滤下载类型文件 20180209
+ if (preg_match('/\.(zip|7z|cab|rar|iso|gho|jar|ace|tar|gz|bz2|z|xml|pdf|doc|txt|rtf|snd|xls|xlsx|docx|apk|ipa|flv|midi|mps|pls|pps|ppa|pwz|mp3|mp4|mpeg|mpe|asf|asx|mpg|3gp|mov|m4v|mkv|vob|vod|mod|ogg|rm|rmvb|wmv|avi|dat|exe|wps|js|css|bmp|jpg|png|gif|ico|tiff|jpeg|svg|webp|mpa|mdb|bin)$/iu', $url))
+ {
+ return false;
+ }
+
+ //增加 要排除的列表页特征正则 BY KEN
+ if ( ! empty(self::$configs['list_url_regexes_remove']))
+ {
+ foreach (self::$configs['list_url_regexes_remove'] as $regex)
+ {
+ if (preg_match("#{$regex}#i", $url))
+ {
+ return false;
+ }
+ }
+ }
+
+ //增加无列表页选项,即所有页面都要抓取内容,包含列表页
+ if (empty(self::$configs['list_url_regexes']) or self::$configs['list_url_regexes'][0] == 'x')
+ {
+ return false;
+ }
+
+ //增加泛列表页,即所有页面都是列表页,只抓取链接,不抓取内容
+ if (self::$configs['list_url_regexes'][0] == '*')
+ {
+ return true;
+ }
+
+ if ( ! empty(self::$configs['list_url_regexes']))
+ {
+ foreach (self::$configs['list_url_regexes'] as $regex)
+ {
+ if (preg_match("#{$regex}#i", $url))
+ {
+ $result = true;
+ break;
+ }
+ }
+ }
+ return $result;
+ }
+
+ /**
+ * 是否内容页面
+ *
+ * @param mixed $url
+ * @return void
+ * @author seatle
+ * @created time :2016-10-12 19:06
+ */
+ public function is_content_page($url)
+ {
+ $result = false;
+ //过滤下载类型文件 20180209
+ if (preg_match('/\.(zip|7z|cab|rar|iso|gho|jar|ace|tar|gz|bz2|z|xml|pdf|doc|txt|rtf|snd|xls|xlsx|docx|apk|ipa|flv|midi|mps|pls|pps|ppa|pwz|mp3|mp4|mpeg|mpe|asf|asx|mpg|3gp|mov|m4v|mkv|vob|vod|mod|ogg|rm|rmvb|wmv|avi|dat|exe|wps|js|css|bmp|jpg|png|gif|ico|tiff|jpeg|svg|webp|mpa|mdb|bin)$/iu', $url))
+ {
+ return false;
+ }
+
+ //增加 要排除的内容页特征正则 BY KEN
+ if ( ! empty(self::$configs['content_url_regexes_remove']))
+ {
+ foreach (self::$configs['content_url_regexes_remove'] as $regex)
+ {
+ if (preg_match("#{$regex}#i", $url))
+ {
+ return false;
+ }
+ }
+ }
+
+ //增加泛内容模式,即所有页面都要提取内容
+ if (empty(self::$configs['content_url_regexes']) or self::$configs['content_url_regexes'][0] == '*')
+ {
+ return true;
+ }
+ //无内容,泛列表模式,即所有页面都不提取内容
+ if (self::$configs['content_url_regexes'][0] == 'x')
+ {
+ return false;
+ }
+
+ if ( ! empty(self::$configs['content_url_regexes']))
+ {
+ foreach (self::$configs['content_url_regexes'] as $regex)
+ {
+ if (preg_match("#{$regex}#i", $url))
+ {
+ $result = true;
+ break;
+ }
+ }
+ }
+ return $result;
+ }
+
+ /**
+ * Parse command.
+ * php yourfile.php start | stop | status | kill
+ *
+ * @return void
+ */
+ public function parse_command()
+ {
+ // 检查运行命令的参数
+ global $argv;
+ $start_file = $argv[0];
+
+ // 命令
+ $command = isset($argv[1]) ? trim($argv[1]) : 'start';
+
+ // 子命令, 目前只支持-d
+ $command2 = isset($argv[2]) ? $argv[2] : '';
+
+ // 根据命令做相应处理
+ switch($command)
+ {
+ // 启动 phpspider
+ case 'start':
+ if ($command2 === '-d')
+ {
+ self::$daemonize = true;
+ }
+ break;
+ case 'stop':
+ exec("ps aux | grep $start_file | grep -v grep | awk '{print $2}'", $info);
+ if (count($info) <= 1)
+ {
+ echo "PHPSpider[$start_file] not run\n";
+ }
+ else
+ {
+ //echo "PHPSpider[$start_file] is stoping ...\n";
+ echo "PHPSpider[$start_file] stop success";
+ exec("ps aux | grep $start_file | grep -v grep | awk '{print $2}' |xargs kill -SIGINT", $info);
+ }
+ exit;
+ break;
+ case 'kill':
+ exec("ps aux | grep $start_file | grep -v grep | awk '{print $2}' |xargs kill -SIGKILL");
+ break;
+ // 显示 phpspider 运行状态
+ case 'status':
+ exit(0);
+ // 未知命令
+ default :
+ exit("Usage: php yourfile.php {start|stop|status|kill}\n");
+ }
+ }
+
+ /**
+ * Signal hander.
+ *
+ * @param int $signal
+ */
+ public function signal_handler($signal)
+ {
+ switch ($signal)
+ {
+ // Stop.
+ case SIGINT:
+ log::warn('Program stopping...');
+ self::$terminate = true;
+ break;
+ // Show status.
+ case SIGUSR2:
+ echo "show status\n";
+ break;
+ }
+ }
+
+ /**
+ * Install signal handler.
+ *
+ * @return void
+ */
+ public function install_signal()
+ {
+ if (function_exists('pcntl_signal'))
+ {
+ // stop
+ // static调用方式
+ //pcntl_signal(SIGINT, array(__CLASS__, 'signal_handler'), false);
+ pcntl_signal(SIGINT, array(&$this, 'signal_handler'), false);
+ // status
+ pcntl_signal(SIGUSR2, array(&$this, 'signal_handler'), false);
+ // ignore
+ pcntl_signal(SIGPIPE, SIG_IGN, false);
+ }
+ }
+
+ /**
+ * Run as deamon mode.
+ *
+ * @throws Exception
+ */
+ protected static function daemonize()
+ {
+ if (!self::$daemonize)
+ {
+ return;
+ }
+
+ // fork前一定要关闭redis
+ queue::clear_link();
+
+ umask(0);
+ $pid = pcntl_fork();
+ if (-1 === $pid)
+ {
+ throw new Exception('fork fail');
+ }
+ elseif ($pid > 0)
+ {
+ exit(0);
+ }
+ if (-1 === posix_setsid())
+ {
+ throw new Exception('setsid fail');
+ }
+ // Fork again avoid SVR4 system regain the control of terminal.
+ $pid = pcntl_fork();
+ if (-1 === $pid)
+ {
+ throw new Exception('fork fail');
+ }
+ elseif (0 !== $pid)
+ {
+ exit(0);
+ }
+ }
+
+ /**
+ * 检查是否终止当前进程
+ *
+ * @return void
+ * @author seatle
+ * @created time :2016-11-16 11:06
+ */
+ public function check_terminate()
+ {
+ if (!self::$terminate)
+ {
+ return false;
+ }
+
+ // 删除当前任务状态
+ $this->del_task_status(self::$serverid, self::$taskid);
+
+ if (self::$taskmaster)
+ {
+ // 检查子进程是否都退出
+ while (true)
+ {
+ $all_stop = true;
+ for ($i = 2; $i <= self::$tasknum; $i++)
+ {
+ // 只要一个还活着就说明没有完全退出
+ $task_status = $this->get_task_status(self::$serverid, $i);
+ if ($task_status)
+ {
+ $all_stop = false;
+ }
+ }
+ if ($all_stop)
+ {
+ break;
+ }
+ else
+ {
+ log::warn('Task stop waiting...');
+ }
+ sleep(1);
+ }
+
+ $this->del_server_list(self::$serverid);
+
+ // 显示最后结果
+ log::$log_show = true;
+
+ $spider_time_run = util::time2second(intval(microtime(true) - self::$time_start));
+ log::note("Spider finished in {$spider_time_run}");
+
+ $get_collected_url_num = $this->get_collected_url_num();
+ log::note("Total pages: {$get_collected_url_num} \n");
+ }
+ exit();
+ }
+
+ public function start()
+ {
+ $this->parse_command();
+
+ // 爬虫开始时间
+ self::$time_start = time();
+ // 当前任务ID
+ self::$taskid = 1;
+ // 当前任务进程ID
+ self::$taskpid = function_exists('posix_getpid') ? posix_getpid() : 1;
+ self::$collect_succ = 0;
+ self::$collect_fail = 0;
+
+ //--------------------------------------------------------------------------------
+ // 运行前验证
+ //--------------------------------------------------------------------------------
+
+ // 检查PHP版本
+ if (version_compare(PHP_VERSION, '5.3.0', 'lt'))
+ {
+ log::error('PHP 5.3+ is required, currently installed version is: ' . phpversion());
+ exit;
+ }
+
+ // 检查CURL扩展
+ if(!function_exists('curl_init'))
+ {
+ log::error('The curl extension was not found');
+ exit;
+ }
+
+ // 多任务需要pcntl扩展支持
+ if (self::$tasknum > 1 && !function_exists('pcntl_fork'))
+ {
+ log::error('Multitasking needs pcntl, the pcntl extension was not found');
+ exit;
+ }
+
+ // 守护进程需要pcntl扩展支持
+ if (self::$daemonize && !function_exists('pcntl_fork'))
+ {
+ log::error('Daemonize needs pcntl, the pcntl extension was not found');
+ exit;
+ }
+
+ // 集群、保存运行状态、多任务都需要Redis支持
+ if ( self::$multiserver || self::$save_running_state || self::$tasknum > 1 )
+ {
+ self::$use_redis = true;
+
+ queue::set_connect('default', self::$queue_config);
+ if (!queue::init())
+ {
+ if ( self::$multiserver )
+ {
+ log::error('Multiserver needs Redis support, '.queue::$error);
+ exit;
+ }
+
+ if ( self::$tasknum > 1 )
+ {
+ log::error('Multitasking needs Redis support, '.queue::$error);
+ exit;
+ }
+
+ if ( self::$save_running_state )
+ {
+ log::error('Spider kept running state needs Redis support, '.queue::$error);
+ exit;
+ }
+ }
+ }
+
+ // 检查导出
+ $this->check_export();
+
+ // 检查缓存
+ $this->check_cache();
+
+ // 检查 scan_urls
+ if (empty(self::$configs['scan_urls']))
+ {
+ log::error('No scan url to start');
+ exit;
+ }
+
+ foreach ( self::$configs['scan_urls'] as $url )
+ {
+ // 只检查配置中的入口URL, 通过 add_scan_url 添加的不检查了.
+ if (!$this->is_scan_page($url))
+ {
+ log::error("Domain of scan_urls (\"{$url}\") does not match the domains of the domain name");
+ exit;
+ }
+ }
+
+ // windows 下没法显示面板, 强制显示日志
+ if (util::is_win())
+ {
+ self::$configs['name'] = iconv('UTF-8', 'GB2312//IGNORE', self::$configs['name']);
+ log::$log_show = true;
+ }
+ // 守护进程下也显示日志
+ elseif (self::$daemonize)
+ {
+ log::$log_show = true;
+ }
+ else
+ {
+ log::$log_show = isset(self::$configs['log_show']) ? self::$configs['log_show'] : false;
+ }
+
+ if (log::$log_show)
+ {
+ global $argv;
+ $start_file = $argv[0];
+
+ $header = '';
+ if ( ! util::is_win())
+ {
+ $header .= "\033[33m";
+ }
+
+ $header .= "\n[ ".self::$configs['name']." Spider ] is started...\n\n";
+ $header .= ' * PHPSpider Version: '.self::VERSION."\n";
+ $header .= " * Documentation: https://doc.phpspider.org\n";
+ $header .= ' * Task Number: '.self::$tasknum."\n\n";
+ $header .= "Input \"php $start_file stop\" to quit. Start success.\n";
+ if ( ! util::is_win())
+ {
+ $header .= "\033[0m";
+ }
+
+ log::note($header);
+ }
+
+ // 如果是守护进程,恢复日志状态
+ //if (self::$daemonize)
+ //{
+ //log::$log_show = isset(self::$configs['log_show']) ? self::$configs['log_show'] : false;
+ //}
+
+ // 多任务和分布式都要清掉, 当然分布式只清自己的
+ $this->init_redis();
+
+ //--------------------------------------------------------------------------------
+ // 生成多任务
+ //--------------------------------------------------------------------------------
+
+ // 添加入口URL到队列
+ foreach ( self::$configs['scan_urls'] as $url )
+ {
+ // false 表示不允许重复
+ $this->add_scan_url($url, null, false);
+ }
+
+ // 放这个位置, 可以添加入口页面
+ if ($this->on_start)
+ {
+ call_user_func($this->on_start, $this);
+ }
+
+ if (!self::$daemonize)
+ {
+ if (!log::$log_show)
+ {
+ // 第一次先清屏
+ $this->clear_echo();
+
+ // 先显示一次面板, 然后下面再每次采集成功显示一次
+ $this->display_ui();
+ }
+ }
+ else
+ {
+ $this->daemonize();
+ }
+
+ // 安装信号
+ $this->install_signal();
+
+ // 开始采集
+ $this->do_collect_page();
+
+ // 从服务器列表中删除当前服务器信息
+ $this->del_server_list(self::$serverid);
+ }
+
+ /**
+ * 创建一个子进程
+ * @param Worker $worker
+ * @throws Exception
+ */
+ public function fork_one_task($taskid)
+ {
+ $pid = pcntl_fork();
+
+ // 主进程记录子进程pid
+ if($pid > 0)
+ {
+ // 暂时没用
+ //self::$taskpids[$taskid] = $pid;
+ }
+ // 子进程运行
+ elseif (0 === $pid)
+ {
+ log::warn("Fork children task({$taskid}) successful...");
+
+ // 初始化子进程参数
+ self::$time_start = microtime(true);
+ self::$taskid = $taskid;
+ self::$taskmaster = false;
+ self::$taskpid = posix_getpid();
+ self::$collect_succ = 0;
+ self::$collect_fail = 0;
+
+ queue::set_connect('default', self::$queue_config);
+ queue::init();
+
+ //退出前计时,等待1分钟,如果获取不到新任务,再退出
+ self::$stand_by_time = 0;
+ while (self::$stand_by_time < self::$configs['max_stand_by_time'])
+ {
+ $this->do_collect_page();
+ log::warn('Task('.self::$taskid.') Stand By '.self::$stand_by_time.'/'.self::$configs['max_stand_by_time'].' s');
+ self::$stand_by_time++;
+ sleep(1);
+ }
+ $queue_lsize = $this->queue_lsize();
+ log::warn('Task('.self::$taskid.') exit : queue_lsize = '.$queue_lsize);
+ $this->del_task_status(self::$serverid, $taskid);
+
+ // 这里用0表示子进程正常退出
+ exit(0);
+ }
+ else
+ {
+ log::error("Fork children task({$taskid}) fail...");
+ exit;
+ }
+ }
+
+ public function do_collect_page()
+ {
+ while( $queue_lsize = $this->queue_lsize() )
+ {
+ // 如果是主任务
+ if (self::$taskmaster)
+ {
+ // 多任务下主任务未准备就绪
+ if (self::$tasknum > 1 && !self::$fork_task_complete)
+ {
+ // 主进程采集到多于任务数2个时, 生成子任务一起采集
+ if ($queue_lsize > self::$tasknum + 2)
+ {
+ self::$fork_task_complete = true;
+
+ // fork 子进程前一定要先干掉redis连接fd, 不然会存在进程互抢redis fd 问题
+ queue::clear_link();
+ // task进程从2开始, 1被master进程所使用
+ for ($i = 2; $i <= self::$tasknum; $i++)
+ {
+ $this->fork_one_task($i);
+ }
+ }
+ }
+ //在主进程中,保存当前配置到缓存,以使子进程可实时读取动态修改后的配置 20180209
+ if (self::$use_redis and ! empty(self::$configs))
+ {
+ queue::set('configs_'.self::$configs['name'], json_encode(self::$configs));
+ }
+ // 抓取页面
+ $this->collect_page();
+ // 保存任务状态
+ $this->set_task_status();
+
+ // 每采集成功一次页面, 就刷新一次面板
+ if (!log::$log_show && !self::$daemonize)
+ {
+ $this->display_ui();
+ }
+ }
+ // 如果是子任务
+ else
+ {
+ // 主进程采集到多于任务数2个时, 子任务可以采集, 否则等待...
+ if ($queue_lsize > self::$taskid + 2)
+ {
+ //在子进程中,从内存中实时读取当前最新配置,用于适应主进程常驻内存模式,无限循环后的配置变动 20180209
+ if (self::$use_redis and ! empty(self::$configs))
+ {
+ if ($configs_active = queue::get('configs_'.self::$configs['name']))
+ {
+ self::$configs = json_decode($configs_active, true);
+ }
+ }
+ // 抓取页面
+ $this->collect_page();
+ // 保存任务状态
+ $this->set_task_status();
+ }
+ else
+ {
+ log::warn('Task('.self::$taskid.') waiting...reason: queue_lsize = '.$queue_lsize.' < tasknum = '.self::$tasknum);
+ sleep(1);
+ }
+ }
+
+ // 检查进程是否收到关闭信号
+ $this->check_terminate();
+ }
+ }
+
+ /**
+ * 爬取页面
+ *
+ * @param mixed $collect_url 要抓取的链接
+ * @return void
+ * @author seatle
+ * @created time :2016-09-18 10:17
+ */
+ public function collect_page()
+ {
+ //减少非必要 queue_lsize 查询 20180214
+ if (isset(self::$configs['log_type']) and strstr(self::$configs['log_type'], 'info'))
+ {
+ $get_collect_url_num = $this->get_collect_url_num();
+ log::info('task id: '.self::$taskid." Find pages: {$get_collect_url_num} ");
+
+ $queue_lsize = $this->queue_lsize();
+ log::info('task id: '.self::$taskid." Waiting for collect pages: {$queue_lsize} ");
+
+ $get_collected_url_num = $this->get_collected_url_num();
+ log::info('task id: '.self::$taskid." Collected pages: {$get_collected_url_num} ");
+
+ // 多任务的时候输出爬虫序号
+ if (self::$tasknum > 1)
+ {
+ log::info('Current task id: '.self::$taskid);
+ }
+ }
+ //顺序提取任务,先进先出(当配置 queue_order = rand ,先进先出无效,都为随机提取任务)
+ $link = $this->queue_rpop();
+
+ if (empty($link))
+ {
+ log::warn('Task('.self::$taskid.') Get Task link Fail...Stand By...');
+ return false;
+ }
+ $link = $this->link_uncompress($link);
+ if (empty($link['url']))
+ {
+ log::warn('Task('.self::$taskid.') Get Task url Fail...Stand By...');
+ return false;
+ }
+ self::$stand_by_time = 0; //接到任务,则超时退出计时重置
+
+ $url = $link['url'];
+
+ //限制单域名最大url数量 20180213
+ if (isset(self::$configs['max_pages']) and self::$configs['max_pages'] > 0)
+ {
+ $domain_pages_num = $this->incr_pages_num($url);
+ if ($domain_pages_num > self::$configs['max_pages'])
+ {
+ log::debug('Task('.self::$taskid.') pages = '.$domain_pages_num.' more than '.self::$configs['max_pages'].", $url [Skip]");
+ return false;
+ }
+ }
+
+ //限制单域名最大花费时长 20180213
+ if (isset(self::$configs['max_duration']) and self::$configs['max_duration'] > 0)
+ {
+ $domain_duration = $this->get_duration_num($url);
+ if ($domain_duration > self::$configs['max_duration'])
+ {
+ log::debug('Task('.self::$taskid.') duration = '.$domain_duration.' more than '.self::$configs['max_duration'].", $url [Skip]");
+ return false;
+ }
+ }
+
+ //当前 host 并发检测 2018-5 BY KEN
+ if (self::$configs['max_task_per_host'] > 0)
+ {
+ $task_per_host = $this->get_task_per_host_num($url);
+ if ($task_per_host < self::$configs['max_task_per_host'])
+ {
+ $task_per_host = $this->incr_task_per_host($url);
+ }
+ else
+ {
+ log::warn('Task('.self::$taskid.') task_per_host = '.$task_per_host.' > '.self::$configs['max_task_per_host'].' ; URL: '.$url.' will be retry later...');
+ $this->queue_lpush($link); //放回队列
+ usleep(100000);
+ return false;
+ }
+ }
+
+ // 已采集页面数量 +1
+ $this->incr_collected_url_num($url);
+
+ // 爬取页面开始时间
+ $page_time_start = microtime(true);
+
+ // 下载页面前执行
+ // 比如有时需要根据某个特定的URL,来决定这次的请求是否使用代理 / 或使用哪个代理
+ if ($this->on_before_download_page)
+ {
+ $return = call_user_func($this->on_before_download_page, $url, $link, $this);
+ if (isset($return)) $link = $return;
+ }
+
+ requests::$input_encoding = null;
+ $html = $this->request_url($url, $link);
+
+ //记录速度较慢域名花费抓取时间 20180213
+ $time_run = round(microtime(true) - $page_time_start);
+ if ($time_run > 1)
+ {
+ $this->incr_duration_num($url, $time_run);
+ }
+
+ // 爬完页面开始处理时间
+ $page_time_start = microtime(true);
+
+ if (!$html)
+ {
+ return false;
+ }
+ // 当前正在爬取的网页页面的对象
+ $page = array(
+ 'url' => $url,
+ 'raw' => $html,
+ 'request' => array(
+ 'url' => $url,
+ 'method' => $link['method'],
+ 'headers' => $link['headers'],
+ 'params' => $link['params'],
+ 'context_data' => $link['context_data'],
+ 'try_num' => $link['try_num'],
+ 'max_try' => $link['max_try'],
+ 'depth' => $link['depth'],
+ 'taskid' => self::$taskid,
+ ),
+ );
+ //printf("memory usage: %.2f M\n", memory_get_usage() / 1024 / 1024 );
+ unset($html);
+
+ //--------------------------------------------------------------------------------
+ // 处理回调函数
+ //--------------------------------------------------------------------------------
+
+ // 判断当前网页是否被反爬虫了, 需要开发者实现
+ if ($this->is_anti_spider)
+ {
+ $is_anti_spider = call_user_func($this->is_anti_spider, $url, $page['raw'], $this);
+ // 如果在回调函数里面判断被反爬虫并且返回true
+ if ($is_anti_spider)
+ {
+ return false;
+ }
+ }
+
+ // 在一个网页下载完成之后调用. 主要用来对下载的网页进行处理.
+ // 比如下载了某个网页, 希望向网页的body中添加html标签
+ if ($this->on_download_page)
+ {
+ $return = call_user_func($this->on_download_page, $page, $this);
+ // 针对那些老是忘记return的人
+ if (isset($return))
+ {
+ $page = $return;
+ }
+ unset($return);
+ }
+
+ // 是否从当前页面分析提取URL
+ // 回调函数如果返回false表示不需要再从此网页中发现待爬url
+ $is_find_url = true;
+ if ($link['url_type'] == 'scan_page')
+ {
+ if ($this->on_scan_page)
+ {
+ $return = call_user_func($this->on_scan_page, $page, $page['raw'], $this);
+ if (isset($return))
+ {
+ $is_find_url = $return;
+ }
+
+ unset($return);
+ }
+ }
+ elseif ($link['url_type'] == 'content_page')
+ {
+ if ($this->on_content_page)
+ {
+ $return = call_user_func($this->on_content_page, $page, $page['raw'], $this);
+ if (isset($return))
+ {
+ $is_find_url = $return;
+ }
+ unset($return);
+ }
+ }
+ elseif ($link['url_type'] == 'list_page')
+ {
+ if ($this->on_list_page)
+ {
+ $return = call_user_func($this->on_list_page, $page, $page['raw'], $this);
+ if (isset($return))
+ {
+ $is_find_url = $return;
+ }
+ unset($return);
+ }
+ }
+
+ // on_scan_page、on_list_page、on_content_page 返回false表示不需要再从此网页中发现待爬url
+ if ($is_find_url)
+ {
+ // 如果深度没有超过最大深度, 获取下一级URL
+ if (self::$configs['max_depth'] == 0 || $link['depth'] < self::$configs['max_depth'])
+ {
+ // 分析提取HTML页面中的URL
+ $this->get_urls($page['raw'], $url, $link['depth'] + 1);
+ }
+ }
+
+ // 如果是内容页, 分析提取HTML页面中的字段
+ // 列表页也可以提取数据的, source_type: urlcontext, 未实现
+ if ($link['url_type'] == 'content_page')
+ {
+ $this->get_html_fields($page['raw'], $url, $page);
+ }
+
+ // 如果当前深度大于缓存的, 更新缓存
+ $this->incr_depth_num($link['depth']);
+
+ // 处理页面耗时时间
+ $time_run = round(microtime(true) - $page_time_start, 3);
+ log::debug('task id: '.self::$taskid." Success process page {$url} in {$time_run} s");
+
+ $spider_time_run = util::time2second(intval(microtime(true) - self::$time_start));
+ log::info('task id: '.self::$taskid." Spider running in {$spider_time_run}");
+
+ // 爬虫爬取每个网页的时间间隔, 单位: 毫秒
+ if (!isset(self::$configs['interval']))
+ {
+ // 默认睡眠100毫秒, 太快了会被认为是ddos
+ self::$configs['interval'] = 100;
+ }
+ usleep(self::$configs['interval'] * 1000);
+ }
+
+ /**
+ * 下载网页, 得到网页内容
+ *
+ * @param mixed $url
+ * @param mixed $link
+ * @return void
+ * @author seatle
+ * @created time :2016-09-18 10:17
+ */
+ public function request_url($url, $link = array())
+ {
+ $time_start = microtime(true);
+
+ //$url = "http://www.qiushibaike.com/article/117568316";
+
+ // 设置了编码就不要让requests去判断了
+ if (isset(self::$configs['input_encoding']))
+ {
+ requests::$input_encoding = self::$configs['input_encoding'];
+ }
+ // 得到的编码如果不是utf-8的要转成utf-8, 因为xpath只支持utf-8
+ requests::$output_encoding = 'utf-8';
+ requests::set_timeout(self::$configs['timeout']);
+ requests::set_useragent(self::$configs['user_agent']);
+
+ // 先删除伪造IP
+ requests::del_client_ip();
+ // 是否设置了伪造IP
+ if (self::$configs['client_ip'])
+ {
+ requests::set_client_ip(self::$configs['client_ip']);
+ }
+
+ // 先删除代理,免得前一个URL的代理被带过来了
+ requests::del_proxy();
+ // 是否设置了代理
+ if ($link['proxy'])
+ {
+ requests::set_proxy($link['proxy']);
+ }
+
+ // 如何设置了 HTTP Headers
+ if (!empty($link['headers']))
+ {
+ foreach ($link['headers'] as $k=>$v)
+ {
+ requests::set_header($k, $v);
+ }
+ }
+ //限制 http 请求模式为 get 或 post
+ $method = trim(strtolower($link['method']));
+ $method = ($method == 'post') ? 'post' : 'get';
+ $params = empty($link['params']) ? array() : $link['params'];
+ $html = requests::$method($url, $params);
+ // 此url附加的数据不为空, 比如内容页需要列表页一些数据, 拼接到后面去
+ if ($html && !empty($link['context_data']))
+ {
+ $html .= $link['context_data'];
+ }
+
+ $http_code = requests::$status_code;
+
+ //请求完成 host 的并发计数减 1 2018-5 BY KEN
+ if (self::$configs['max_task_per_host'] > 0)
+ {
+ $this->incr_task_per_host($url, 'decr');
+ }
+
+ if ($this->on_status_code)
+ {
+ $return = call_user_func($this->on_status_code, $http_code, $url, $html, $this);
+ if (isset($return))
+ {
+ $html = $return;
+ }
+ unset($return);
+ if ( ! $html)
+ {
+ return false;
+ }
+ }
+
+ if ($http_code != 200)
+ {
+ // 如果是301、302跳转, 抓取跳转后的网页内容
+ if ($http_code == 301 || $http_code == 302)
+ {
+ $info = requests::$info;
+ //if (isset($info['redirect_url']))
+ if (!empty($info['redirect_url']))
+ {
+ $url = $info['redirect_url'];
+ requests::$input_encoding = null;
+ $method = empty($link['method']) ? 'get' : strtolower($link['method']);
+ $params = empty($link['params']) ? array() : $link['params'];
+ $html = requests::$method($url, $params);
+ // 有跳转的就直接获取就好,不要调用自己,容易进入死循环
+ //$html = $this->request_url($url, $link);
+ if ($html && !empty($link['context_data']))
+ {
+ $html .= $link['context_data'];
+ }
+ }
+ else
+ {
+ return false;
+ }
+ }
+ else
+ {
+ if ( ! empty(self::$configs['max_try']) and $http_code == 407)
+ {
+ // 扔到队列头部去, 继续采集
+ $this->queue_rpush($link);
+ log::error("Failed to download page {$url}");
+ self::$collect_fail++;
+ }
+ elseif ( ! empty(self::$configs['max_try']) and in_array($http_code, array('0', '502', '503', '429')))
+ {
+ // 采集次数加一
+ $link['try_num']++;
+ // 抓取次数 小于 允许抓取失败次数
+ if ( $link['try_num'] <= $link['max_try'] )
+ {
+ // 扔到队列头部去, 继续采集
+ $this->queue_rpush($link);
+ }
+ log::error("Failed to download page {$url}, retry({$link['try_num']})");
+ }
+ else
+ {
+ log::error("Failed to download page {$url}");
+ self::$collect_fail++;
+ }
+ log::error("HTTP CODE: {$http_code}");
+ return false;
+ }
+ }
+
+ // 爬取页面耗时时间
+ $time_run = round(microtime(true) - $time_start, 3);
+ log::debug("Success download page {$url} in {$time_run} s");
+ self::$collect_succ++;
+
+ return $html;
+ }
+
+ /**
+ * 分析提取HTML页面中的URL
+ *
+ * @param mixed $html HTML内容
+ * @param mixed $collect_url 抓取的URL, 用来拼凑完整页面的URL
+ * @return void
+ * @author seatle
+ * @created time :2016-09-18 10:17
+ */
+ public function get_urls($html, $collect_url, $depth = 0)
+ {
+ //--------------------------------------------------------------------------------
+ // 正则匹配出页面中的URL
+ //--------------------------------------------------------------------------------
+ $urls = selector::select($html, '//a/@href');
+ //preg_match_all("/ \r\n\t]{1,}/isU", $html, $matchs);
+ //$urls = array();
+ //if (!empty($matchs[1]))
+ //{
+ //foreach ($matchs[1] as $url)
+ //{
+ //$urls[] = str_replace(array("\"", "'",'&'), array("",'','&'), $url);
+ //}
+ //}
+
+ if (empty($urls))
+ {
+ return false;
+ }
+
+ // 如果页面上只有一个url,要把他转为数组,否则下面会报警告
+ if (!is_array($urls))
+ {
+ $urls = array($urls);
+ }
+
+ foreach ($urls as $key=>$url)
+ {
+ //限制最大子域名数量
+ if ( ! empty(self::$configs['max_sub_num']))
+ {
+ //抓取子域名超过超过指定值,就丢掉
+ $sub_domain_count = $this->sub_domain_count($url);
+ if ($sub_domain_count > self::$configs['max_sub_num'])
+ {
+ unset($urls[$key]);
+ log::debug('Task('.self::$taskid.') subdomin = '.$sub_domain_count.' more than '.self::$configs['max_sub_num'].",get_urls $url [Skip]");
+ continue;
+ }
+ }
+ $urls[$key] = str_replace(array('"', "'", '&'), array('', '', '&'), $url);
+ }
+
+ //--------------------------------------------------------------------------------
+ // 过滤和拼凑URL
+ //--------------------------------------------------------------------------------
+ // 去除重复的URL
+ $urls = array_unique($urls);
+ foreach ($urls as $k=>$url)
+ {
+ $url = trim($url);
+ if (empty($url))
+ {
+ continue;
+ }
+
+ $val = $this->fill_url($url, $collect_url);
+
+ //限制单域名最大url数量 20180213
+ if ($val and isset(self::$configs['max_pages']) and self::$configs['max_pages'] > 0)
+ {
+ $domain_pages_num = $this->incr_pages_num($val);
+ if ($domain_pages_num > self::$configs['max_pages'])
+ {
+ continue;
+ }
+ }
+
+ if ($val)
+ {
+ $urls[$k] = $val;
+ }
+ else
+ {
+ unset($urls[$k]);
+ }
+ }
+
+ if (empty($urls))
+ {
+ return false;
+ }
+
+ //--------------------------------------------------------------------------------
+ // 把抓取到的URL放入队列
+ //--------------------------------------------------------------------------------
+ foreach ($urls as $url)
+ {
+ if ($this->on_fetch_url)
+ {
+ $return = call_user_func($this->on_fetch_url, $url, $this);
+ $url = isset($return) ? $return : $url;
+ unset($return);
+
+ // 如果 on_fetch_url 返回 false,此URL不入队列
+ if (!$url)
+ {
+ continue;
+ }
+ }
+
+ // 把当前页当做找到的url的Referer页
+ $options = array(
+ 'headers' => array(
+ 'Referer' => $collect_url,
+ )
+ );
+ $this->add_url($url, $options, $depth);
+ }
+ }
+
+ /**
+ * 获得完整的连接地址
+ *
+ * @param mixed $url 要检查的URL
+ * @param mixed $collect_url 从那个URL页面得到上面的URL
+ * @return void
+ * @author seatle
+ * @created time :2016-09-23 17:13
+ */
+ public function fill_url($url, $collect_url)
+ {
+ $url = trim($url);
+ $collect_url = trim($collect_url);
+
+ // 排除JavaScript的连接
+ //if (strpos($url, "javascript:") !== false)
+ if (preg_match("@^(mailto|javascript:|#|'|\")@i", $url) || $url == '')
+ {
+ return false;
+ }
+ // 排除没有被解析成功的语言标签
+ if (substr($url, 0, 3) == '<%=' or substr($url, 0, 1) == '{' or substr($url, 0, 2) == ' {')
+ // if(substr($url, 0, 3) == '<%=')
+ {
+ return false;
+ }
+
+ $parse_url = @parse_url($collect_url);
+ if (empty($parse_url['scheme']) || empty($parse_url['host']))
+ {
+ return false;
+ }
+ // 过滤mailto、tel、sms、wechat、sinaweibo、weixin等协议
+ if ( ! in_array($parse_url['scheme'], array('http', 'https')))
+ {
+ return false;
+ }
+ $scheme = $parse_url['scheme'];
+ $domain = $parse_url['host'];
+ $path = empty($parse_url['path']) ? '' : $parse_url['path'];
+ $base_url_path = $domain.$path;
+ $base_url_path = preg_replace("/\/([^\/]*)\.(.*)$/", '/', $base_url_path);
+ $base_url_path = preg_replace("/\/$/", '', $base_url_path);
+ $i = $path_step = 0;
+ $dstr = $pstr = '';
+ $pos = strpos($url, '#');
+ if ($pos > 0)
+ {
+ // 去掉#和后面的字符串
+ $url = substr($url, 0, $pos);
+ }
+
+ // 修正url格式为 //www.jd.com/111.html 为正确的http
+ if (substr($url, 0, 2) == '//')
+ {
+ $url = preg_replace('/^\/\//iu', '', $url);
+ }
+ // /1234.html
+ elseif($url[0] == '/')
+ {
+ $url = $domain.$url;
+ }
+ // ./1234.html、../1234.html 这种类型的
+ elseif($url[0] == '.')
+ {
+ if(!isset($url[2]))
+ {
+ return false;
+ }
+ else
+ {
+ $urls = explode('/',$url);
+ foreach($urls as $u)
+ {
+ if( $u == '..' )
+ {
+ $path_step++;
+ }
+ // 遇到 ., 不知道为什么不直接写$u == '.', 貌似一样的
+ else if( $i < count($urls)-1 )
+ {
+ $dstr .= $urls[$i].'/';
+ }
+ else
+ {
+ $dstr .= $urls[$i];
+ }
+ $i++;
+ }
+ $urls = explode('/',$base_url_path);
+ if(count($urls) <= $path_step)
+ {
+ return false;
+ }
+ else
+ {
+ $pstr = '';
+ for($i=0;$i
+ * @created time :2016-11-05 18:58
+ */
+ public function link_compress($link)
+ {
+ if (empty($link['url_type']))
+ {
+ unset($link['url_type']);
+ }
+
+ if (empty($link['method']) || strtolower($link['method']) == 'get')
+ {
+ unset($link['method']);
+ }
+
+ if (empty($link['headers']))
+ {
+ unset($link['headers']);
+ }
+
+ if (empty($link['params']))
+ {
+ unset($link['params']);
+ }
+
+ if (empty($link['context_data']))
+ {
+ unset($link['context_data']);
+ }
+
+ if (empty($link['proxy']))
+ {
+ unset($link['proxy']);
+ }
+
+ if (empty($link['try_num']))
+ {
+ unset($link['try_num']);
+ }
+
+ if (empty($link['max_try']))
+ {
+ unset($link['max_try']);
+ }
+
+ if (empty($link['depth']))
+ {
+ unset($link['depth']);
+ }
+ //$json = json_encode($link);
+ //$json = gzdeflate($json);
+ return $link;
+ }
+
+ /**
+ * 连接对象解压缩
+ *
+ * @param mixed $link
+ * @return void
+ * @author seatle
+ * @created time :2016-11-05 18:58
+ */
+ public function link_uncompress($link)
+ {
+ $link = array(
+ 'url' => isset($link['url']) ? $link['url'] : '',
+ 'url_type' => isset($link['url_type']) ? $link['url_type'] : '',
+ 'method' => isset($link['method']) ? $link['method'] : 'get',
+ 'headers' => isset($link['headers']) ? $link['headers'] : array(),
+ 'params' => isset($link['params']) ? $link['params'] : array(),
+ 'context_data' => isset($link['context_data']) ? $link['context_data'] : '',
+ 'proxy' => isset($link['proxy']) ? $link['proxy'] : self::$configs['proxy'],
+ 'try_num' => isset($link['try_num']) ? $link['try_num'] : 0,
+ 'max_try' => isset($link['max_try']) ? $link['max_try'] : self::$configs['max_try'],
+ 'depth' => isset($link['depth']) ? $link['depth'] : 0,
+ );
+
+ return $link;
+ }
+
+ /**
+ * 分析提取HTML页面中的字段
+ *
+ * @param mixed $html
+ * @return void
+ * @author seatle
+ * @created time :2016-09-18 10:17
+ */
+ public function get_html_fields($html, $url, $page)
+ {
+ $fields = $this->get_fields(self::$configs['fields'], $html, $url, $page);
+
+ if (!empty($fields))
+ {
+ if ($this->on_extract_page)
+ {
+ $return = call_user_func($this->on_extract_page, $page, $fields);
+ if (!isset($return))
+ {
+ log::warn("on_extract_page return value can't be empty");
+ }
+ // 返回false,跳过当前页面,内容不入库
+ elseif ($return === false)
+ {
+ return false;
+ }
+ elseif (!is_array($return))
+ {
+ log::warn('on_extract_page return value must be an array');
+ }
+ else
+ {
+ $fields = $return;
+ }
+ }
+
+ if (isset($fields) && is_array($fields))
+ {
+ $fields_num = $this->incr_fields_num();
+ if (self::$configs['max_fields'] != 0 && $fields_num > self::$configs['max_fields'])
+ {
+ exit(0);
+ }
+
+ if (version_compare(PHP_VERSION,'5.4.0','<'))
+ {
+ $fields_str = json_encode($fields);
+ $fields_str = preg_replace_callback("#\\\u([0-9a-f]{4})#i", function ($matchs)
+ {
+ return @iconv('UCS-2BE', 'UTF-8', pack('H4', $matchs[1]));
+ }, $fields_str);
+ }
+ else
+ {
+ $fields_str = json_encode($fields, JSON_UNESCAPED_UNICODE);
+ }
+
+ if (util::is_win())
+ {
+ $fields_str = mb_convert_encoding($fields_str, 'gb2312', 'utf-8');
+ }
+ log::info("Result[{$fields_num}]: ".$fields_str);
+
+ // 如果设置了导出选项
+ if (!empty(self::$configs['export']))
+ {
+ self::$export_type = isset(self::$configs['export']['type']) ? self::$configs['export']['type'] : '';
+ if (self::$export_type == 'csv')
+ {
+ util::put_file(self::$export_file, util::format_csv($fields)."\n", FILE_APPEND);
+ }
+ elseif (self::$export_type == 'sql')
+ {
+ $sql = db::insert(self::$export_table, $fields, true);
+ util::put_file(self::$export_file, $sql.";\n", FILE_APPEND);
+ }
+ elseif (self::$export_type == 'db')
+ {
+ db::insert(self::$export_table, $fields);
+ }
+ }
+ }
+ }
+ }
+
+ /**
+ * 根据配置提取HTML代码块中的字段
+ *
+ * @param mixed $confs
+ * @param mixed $html
+ * @param mixed $page
+ * @return void
+ * @author seatle
+ * @created time :2016-09-23 17:13
+ */
+ public function get_fields($confs, $html, $url, $page)
+ {
+ $fields = array();
+ foreach ($confs as $conf)
+ {
+ // 当前field抽取到的内容是否是有多项
+ $repeated = isset($conf['repeated']) && $conf['repeated'] ? true : false;
+ // 当前field抽取到的内容是否必须有值
+ $required = isset($conf['required']) && $conf['required'] ? true : false;
+
+ if (empty($conf['name']))
+ {
+ log::error("The field name is null, please check your \"fields\" and add the name of the field\n");
+ exit;
+ }
+
+ $values = NULL;
+ // 如果定义抽取规则
+ if (!empty($conf['selector']))
+ {
+ // 如果这个field是上一个field的附带连接
+ if (isset($conf['source_type']) && $conf['source_type']=='attached_url')
+ {
+ // 取出上个field的内容作为连接, 内容分页是不进队列直接下载网页的
+ if (!empty($fields[$conf['attached_url']]))
+ {
+ $collect_url = $this->fill_url($fields[$conf['attached_url']], $url);
+ log::debug("Find attached content page: {$collect_url}");
+ $link['url'] = $collect_url;
+ $link = $this->link_uncompress($link);
+ requests::$input_encoding = null;
+ $html = $this->request_url($collect_url, $link);
+ // 在一个attached_url对应的网页下载完成之后调用. 主要用来对下载的网页进行处理.
+ if ($this->on_download_attached_page)
+ {
+ $return = call_user_func($this->on_download_attached_page, $html, $this);
+ if (isset($return))
+ {
+ $html = $return;
+ }
+ }
+
+ // 请求获取完分页数据后把连接删除了
+ unset($fields[$conf['attached_url']]);
+ }
+ }
+
+ // 没有设置抽取规则的类型 或者 设置为 xpath
+ if (!isset($conf['selector_type']) || $conf['selector_type']=='xpath')
+ {
+ // 如果找不到,返回的是false
+ $values = $this->get_fields_xpath($html, $conf['selector'], $conf['name']);
+ }
+ elseif ($conf['selector_type']=='css')
+ {
+ $values = $this->get_fields_css($html, $conf['selector'], $conf['name']);
+ }
+ elseif ($conf['selector_type']=='regex')
+ {
+ $values = $this->get_fields_regex($html, $conf['selector'], $conf['name']);
+ }
+
+ // field不为空而且存在子配置
+ if (isset($values) && !empty($conf['children']))
+ {
+ // 如果提取到的结果是字符串,就转为数组,方便下面统一foreach
+ if (!is_array($values))
+ {
+ $values = array($values);
+ }
+ $child_values = array();
+ // 父项抽取到的html作为子项的提取内容
+ foreach ($values as $child_html)
+ {
+ // 递归调用本方法, 所以多少子项目都支持
+ $child_value = $this->get_fields($conf['children'], $child_html, $url, $page);
+ if (!empty($child_value))
+ {
+ $child_values[] = $child_value;
+ }
+ }
+ // 有子项就存子项的数组, 没有就存HTML代码块
+ if (!empty($child_values))
+ {
+ $values = $child_values;
+ }
+ }
+ }
+
+ if (!isset($values))
+ {
+ // 如果值为空而且值设置为必须项, 跳出foreach循环
+ if ($required)
+ {
+ log::warn("Selector {$conf['name']}[{$conf['selector']}] not found, It's a must");
+ // 清空整个 fields,当前页面就等于略过了
+ $fields = array();
+ break;
+ }
+ // 避免内容分页时attached_url拼接时候string + array了
+ $fields[$conf['name']] = '';
+ //$fields[$conf['name']] = array();
+ }
+ else
+ {
+ if (is_array($values))
+ {
+ if ($repeated)
+ {
+ $fields[$conf['name']] = $values;
+ }
+ else
+ {
+ $fields[$conf['name']] = $values[0];
+ }
+ }
+ else
+ {
+ $fields[$conf['name']] = $values;
+ }
+ // 不重复抽取则只取第一个元素
+ //$fields[$conf['name']] = $repeated ? $values : $values[0];
+ }
+ }
+
+ if (!empty($fields))
+ {
+ foreach ($fields as $fieldname => $data)
+ {
+ $pattern = "/
\r\n\t]{1,}/isu";
+ /*$pattern = "//i"; */
+ // 在抽取到field内容之后调用, 对其中包含的img标签进行回调处理
+ if ($this->on_handle_img && preg_match($pattern, $data))
+ {
+ $return = call_user_func($this->on_handle_img, $fieldname, $data);
+ if (!isset($return))
+ {
+ log::warn("on_handle_img return value can't be empty\n");
+ }
+ else
+ {
+ // 有数据才会执行 on_handle_img 方法, 所以这里不要被替换没了
+ $data = $return;
+ }
+ }
+
+ // 当一个field的内容被抽取到后进行的回调, 在此回调中可以对网页中抽取的内容作进一步处理
+ if ($this->on_extract_field)
+ {
+ $return = call_user_func($this->on_extract_field, $fieldname, $data, $page);
+ if (!isset($return))
+ {
+ log::warn("on_extract_field return value can't be empty\n");
+ }
+ else
+ {
+ // 有数据才会执行 on_extract_field 方法, 所以这里不要被替换没了
+ $fields[$fieldname] = $return;
+ }
+ }
+ }
+ }
+
+ return $fields;
+ }
+
+ /**
+ * 验证导出
+ *
+ * @return void
+ * @author seatle
+ * @created time :2016-10-02 23:37
+ */
+ public function check_export()
+ {
+ // 如果设置了导出选项
+ if (!empty(self::$configs['export']))
+ {
+ if (self::$export_type == 'csv')
+ {
+ if (empty(self::$export_file))
+ {
+ log::error('Export data into CSV files need to Set the file path.');
+ exit;
+ }
+ }
+ elseif (self::$export_type == 'sql')
+ {
+ if (empty(self::$export_file))
+ {
+ log::error('Export data into SQL files need to Set the file path.');
+ exit;
+ }
+ }
+ elseif (self::$export_type == 'db')
+ {
+ if (!function_exists('mysqli_connect'))
+ {
+ log::error('Export data to a database need Mysql support, unable to load mysqli extension.');
+ exit;
+ }
+
+ if (empty(self::$db_config))
+ {
+ log::error('Export data to a database need Mysql support, you have not set a config array for connect.');
+ exit;
+ }
+
+ $config = self::$db_config;
+ @mysqli_connect($config['host'], $config['user'], $config['pass'], $config['name'], $config['port']);
+ if(mysqli_connect_errno())
+ {
+ log::error('Export data to a database need Mysql support, '.mysqli_connect_error());
+ exit;
+ }
+
+ db::set_connect('default', $config);
+ db::_init();
+
+ if (!db::table_exists(self::$export_table))
+ {
+ log::error('Table '.self::$export_table.' does not exist');
+ exit;
+ }
+ }
+ }
+ }
+
+ public function check_cache()
+ {
+ if ( !self::$use_redis || self::$save_running_state)
+ {
+ return false;
+ }
+
+ // 这个位置要改
+ //$keys = queue::keys("*");
+ //$count = count($keys);
+ // 直接检查db,清空的时候整个db清空,所以注意db不要跟其他项目混用
+ $count = queue::dbsize();
+ if ( $count > 0 )
+ {
+ // After this operation, 4,318 kB of additional disk space will be used.
+ // Do you want to continue? [Y/n]
+ //$msg = "发现Redis中有采集数据, 是否继续执行, 不继续则清空Redis数据重新采集\n";
+ $msg = "Found that the data of Redis, no continue will empty Redis data start again\n";
+ $msg .= 'Do you want to continue? [Y/n]';
+ fwrite(STDOUT, $msg);
+ $arg = strtolower(trim(fgets(STDIN)));
+ $arg = empty($arg) || !in_array($arg, array('Y', 'N', 'y','n')) ? 'y' : strtolower($arg);
+ if ($arg == 'n')
+ {
+ log::warn('Clear redis data...');
+ queue::flushdb();
+ // 下面这种性能太差了
+ //foreach ($keys as $key)
+ //{
+ //$key = str_replace(self::$queue_config['prefix'].':', '', $key);
+ //queue::del($key);
+ //}
+ }
+ }
+ }
+
+ public function init_redis()
+ {
+ if (!self::$use_redis)
+ {
+ return false;
+ }
+
+ // 添加当前服务器到服务器列表
+ $this->add_server_list(self::$serverid, self::$tasknum);
+
+ // 删除当前服务器的任务状态
+ // 对于被强制退出的进程有用
+ for ($i = 1; $i <= self::$tasknum; $i++)
+ {
+ $this->del_task_status(self::$serverid, $i);
+ }
+ }
+
+ /**
+ * 设置任务状态, 主进程和子进程每成功采集一个页面后调用
+ *
+ * @return void
+ * @author seatle
+ * @created time :2016-10-30 23:56
+ */
+ public function set_task_status()
+ {
+ // 每采集成功一个页面, 生成当前进程状态到文件, 供主进程使用
+ $mem = round(memory_get_usage(true)/(1024*1024),2);
+ $use_time = microtime(true) - self::$time_start;
+ $speed = round((self::$collect_succ + self::$collect_fail) / $use_time, 2);
+ $status = array(
+ 'id' => self::$taskid,
+ 'pid' => self::$taskpid,
+ 'mem' => $mem,
+ 'collect_succ' => self::$collect_succ,
+ 'collect_fail' => self::$collect_fail,
+ 'speed' => $speed,
+ );
+ $task_status = json_encode($status);
+
+ if (self::$use_redis)
+ {
+ $key = 'server-'.self::$serverid.'-task_status-'.self::$taskid;
+ queue::set($key, $task_status);
+ }
+ else
+ {
+ self::$task_status = array($task_status);
+ }
+ }
+
+ /**
+ * 删除任务状态
+ *
+ * @return void
+ * @author seatle
+ * @created time :2016-11-16 11:06
+ */
+ public function del_task_status($serverid, $taskid)
+ {
+ if (!self::$use_redis)
+ {
+ return false;
+ }
+ $key = "server-{$serverid}-task_status-{$taskid}";
+ queue::del($key);
+ }
+
+ /**
+ * 获得任务状态, 主进程才会调用
+ *
+ * @return void
+ * @author seatle
+ * @created time :2016-10-30 23:56
+ */
+ public function get_task_status($serverid, $taskid)
+ {
+ if (!self::$use_redis)
+ {
+ return false;
+ }
+
+ $key = "server-{$serverid}-task_status-{$taskid}";
+ $task_status = queue::get($key);
+ return $task_status;
+ }
+
+ /**
+ * 获得任务状态, 主进程才会调用
+ *
+ * @return void
+ * @author seatle
+ * @created time :2016-10-30 23:56
+ */
+ public function get_task_status_list($serverid = 1, $tasknum)
+ {
+ $task_status = array();
+ if (self::$use_redis)
+ {
+ for ($i = 1; $i <= $tasknum; $i++)
+ {
+ $key = "server-{$serverid}-task_status-".$i;
+ $task_status[] = queue::get($key);
+ }
+ }
+ else
+ {
+ $task_status = self::$task_status;
+ }
+ return $task_status;
+ }
+
+ /**
+ * 添加当前服务器信息到服务器列表
+ *
+ * @return void
+ * @author seatle
+ * @created time :2016-11-16 11:06
+ */
+ public function add_server_list($serverid, $tasknum)
+ {
+ if (!self::$use_redis)
+ {
+ return false;
+ }
+
+ // 更新服务器列表
+ $server_list_json = queue::get('server_list');
+ $server_list = array();
+ if ( ! $server_list_json)
+ {
+ $server_list[$serverid] = array(
+ 'serverid' => $serverid,
+ 'tasknum' => $tasknum,
+ 'time' => time(),
+ );
+ }
+ else
+ {
+ $server_list = json_decode($server_list_json, true);
+ $server_list[$serverid] = array(
+ 'serverid' => $serverid,
+ 'tasknum' => $tasknum,
+ 'time' => time(),
+ );
+ ksort($server_list);
+ }
+ queue::set('server_list', json_encode($server_list));
+ }
+
+ /**
+ * 从服务器列表中删除当前服务器信息
+ *
+ * @return void
+ * @author seatle
+ * @created time :2016-11-16 11:06
+ */
+ public function del_server_list($serverid)
+ {
+ if (!self::$use_redis)
+ {
+ return false;
+ }
+
+ $server_list_json = queue::get('server_list');
+ $server_list = array();
+ if ($server_list_json)
+ {
+ $server_list = json_decode($server_list_json, true);
+ if (isset($server_list[$serverid]))
+ {
+ unset($server_list[$serverid]);
+ }
+
+ // 删除完当前的任务列表如果还存在,就更新一下Redis
+ if (!empty($server_list))
+ {
+ ksort($server_list);
+ queue::set('server_list', json_encode($server_list));
+ }
+ }
+ }
+
+ /**
+ * 获取等待爬取页面数量
+ *
+ * @param mixed $url
+ * @return void
+ * @author seatle
+ * @created time :2016-09-23 17:13
+ */
+ public function get_collect_url_num()
+ {
+ if (self::$use_redis)
+ {
+ $count = queue::get('collect_urls_num');
+ }
+ else
+ {
+ $count = self::$collect_urls_num;
+ }
+ return $count;
+ }
+
+ /**
+ * 获取已经爬取页面数量
+ *
+ * @param mixed $url
+ * @return void
+ * @author seatle
+ * @created time :2016-09-23 17:13
+ */
+ public function get_collected_url_num()
+ {
+ if (self::$use_redis)
+ {
+ $count = queue::get('collected_urls_num');
+ }
+ else
+ {
+ $count = self::$collected_urls_num;
+ }
+ return $count;
+ }
+
+ /**
+ * 已采集页面数量加一
+ *
+ * @param mixed $url
+ * @return void
+ * @author seatle
+ * @created time :2016-09-23 17:13
+ */
+ public function incr_collected_url_num($url)
+ {
+ if (self::$use_redis)
+ {
+ queue::incr('collected_urls_num');
+ }
+ else
+ {
+ self::$collected_urls_num++;
+ }
+ }
+
+ /**
+ * 从队列左边插入
+ *
+ * @return void
+ * @author seatle
+ * @created time :2016-09-23 17:13
+ */
+ public function queue_lpush($link = array(), $allowed_repeat = false)
+ {
+ if (empty($link) || empty($link['url']))
+ {
+ return false;
+ }
+
+ $url = $link['url'];
+ $link = $this->link_compress($link);
+
+ $status = false;
+ if (self::$use_redis)
+ {
+ $key = 'collect_urls-'.md5($url);
+ $lock = 'lock-'.$key;
+ // 加锁: 一个进程一个进程轮流处理
+ if (queue::lock($lock))
+ {
+ $exists = queue::exists($key);
+ // 不存在或者当然URL可重复入
+ if (!$exists || $allowed_repeat)
+ {
+ // 待爬取网页记录数加一
+ queue::incr('collect_urls_num');
+ // 先标记为待爬取网页
+ queue::set($key, time());
+ // 入队列
+ $link = json_encode($link);
+ //根据采集设置为顺序采集还是随机采集,使用列表或集合对象 2018-5 BY KEN
+ if (self::$configs['queue_order'] == 'rand')
+ {
+ queue::sadd('collect_queue', $link);
+ }
+ else
+ {
+ queue::lpush('collect_queue', $link);
+ }
+ $status = true;
+ }
+ // 解锁
+ queue::unlock($lock);
+ }
+ }
+ else
+ {
+ $key = md5($url);
+ if (!array_key_exists($key, self::$collect_urls))
+ {
+ self::$collect_urls_num++;
+ self::$collect_urls[$key] = time();
+ array_push(self::$collect_queue, $link);
+ $status = true;
+ }
+ }
+ return $status;
+ }
+
+ /**
+ * 从队列右边插入
+ *
+ * @return void
+ * @author seatle
+ * @created time :2016-09-23 17:13
+ */
+ public function queue_rpush($link = array(), $allowed_repeat = false)
+ {
+ if (empty($link) || empty($link['url']))
+ {
+ return false;
+ }
+
+ $url = $link['url'];
+
+ $status = false;
+ if (self::$use_redis)
+ {
+ $key = 'collect_urls-'.md5($url);
+ $lock = 'lock-'.$key;
+ // 加锁: 一个进程一个进程轮流处理
+ if (queue::lock($lock))
+ {
+ $exists = queue::exists($key);
+ // 不存在或者当然URL可重复入
+ if ( ! $exists || $allowed_repeat)
+ {
+ // 待爬取网页记录数加一
+ queue::incr('collect_urls_num');
+ // 先标记为待爬取网页
+ queue::set($key, time());
+ // 入队列
+ $link = json_encode($link);
+ //根据采集设置为顺序采集还是随机采集,使用列表或集合对象 2018-5 BY KEN
+ if (self::$configs['queue_order'] == 'rand')
+ {
+ queue::sadd('collect_queue', $link); //无序集合
+ }
+ else
+ {
+ queue::rpush('collect_queue', $link); //有序列表
+ }
+ $status = true;
+ }
+ // 解锁
+ queue::unlock($lock);
+ }
+ }
+ else
+ {
+ $key = md5($url);
+ if (!array_key_exists($key, self::$collect_urls))
+ {
+ self::$collect_urls_num++;
+ self::$collect_urls[$key] = time();
+ array_unshift(self::$collect_queue, $link);
+ $status = true;
+ }
+ }
+ return $status;
+ }
+
+ /**
+ * 从队列左边取出
+ * 后进先出
+ * 可以避免采集内容页有分页的时候采集失败数据拼凑不全
+ * 还可以按顺序采集列表页
+ *
+ * @return void
+ * @author seatle
+ * @created time :2016-09-23 17:13
+ */
+ public function queue_lpop()
+ {
+ if (self::$use_redis)
+ {
+ //根据采集设置为顺序采集还是随机采集,使用列表或集合对象
+ if (self::$configs['queue_order'] == 'rand')
+ {
+ $link = queue::spop('collect_queue');
+ }
+ else
+ {
+ $link = queue::lpop('collect_queue');
+ }
+ $link = json_decode($link, true);
+ }
+ else
+ {
+ $link = array_pop(self::$collect_queue);
+ }
+ return $link;
+ }
+
+ /**
+ * 从队列右边取出
+ *
+ * @return void
+ * @author seatle
+ * @created time :2016-09-23 17:13
+ */
+ public function queue_rpop()
+ {
+ if (self::$use_redis)
+ {
+ //根据采集设置为顺序采集还是随机采集,使用列表或集合对象
+ if (self::$configs['queue_order'] == 'rand')
+ {
+ $link = queue::spop('collect_queue');
+ }
+ else
+ {
+ $link = queue::rpop('collect_queue');
+ }
+ $link = json_decode($link, true);
+ }
+ else
+ {
+ $link = array_shift(self::$collect_queue);
+ }
+ return $link;
+ }
+
+ /**
+ * 队列长度
+ *
+ * @return void
+ * @author seatle
+ * @created time :2016-09-23 17:13
+ */
+ public function queue_lsize()
+ {
+ if (self::$use_redis)
+ {
+ //根据采集设置为顺序采集还是随机采集,使用列表或集合对象
+ if (self::$configs['queue_order'] == 'rand')
+ {
+ $lsize = queue::scard('collect_queue');
+ }
+ else
+ {
+ $lsize = queue::lsize('collect_queue');
+ }
+ }
+ else
+ {
+ $lsize = count(self::$collect_queue);
+ }
+ return $lsize;
+ }
+
+ /**
+ * 采集深度加一
+ *
+ * @return void
+ * @author seatle
+ * @created time :2016-09-23 17:13
+ */
+ public function incr_depth_num($depth)
+ {
+ if (self::$use_redis)
+ {
+ $lock = 'lock-depth_num';
+ // 锁2秒
+ if (queue::lock($lock, time(), 2))
+ {
+ if (queue::get('depth_num') < $depth)
+ {
+ queue::set('depth_num', $depth);
+ }
+
+ queue::unlock($lock);
+ }
+ }
+ else
+ {
+ if (self::$depth_num < $depth)
+ {
+ self::$depth_num = $depth;
+ }
+ }
+ }
+
+ /**
+ * 获得采集深度
+ *
+ * @return void
+ * @author seatle
+ * @created time :2016-09-23 17:13
+ */
+ public function get_depth_num()
+ {
+ if (self::$use_redis)
+ {
+ $depth_num = queue::get('depth_num');
+ return $depth_num ? $depth_num : 0;
+ }
+ else
+ {
+ return self::$depth_num;
+ }
+ }
+
+ /**
+ * 提取到的field数目加一
+ *
+ * @return void
+ * @author seatle
+ * @created time :2016-09-23 17:13
+ */
+ public function incr_fields_num()
+ {
+ if (self::$use_redis)
+ {
+ $fields_num = queue::incr('fields_num');
+ }
+ else
+ {
+ self::$fields_num++;
+ $fields_num = self::$fields_num;
+ }
+ return $fields_num;
+ }
+
+ /**
+ * 提取到的field数目
+ *
+ * @return void
+ * @author seatle
+ * @created time :2016-09-23 17:13
+ */
+ public function get_fields_num()
+ {
+ if (self::$use_redis)
+ {
+ $fields_num = queue::get('fields_num');
+ }
+ else
+ {
+ $fields_num = self::$fields_num;
+ }
+ return $fields_num ? $fields_num : 0;
+ }
+
+ /**
+ * 提取到的pages数目加一,用于限制单域名采集页数上限
+ *
+ * @return void
+ * @author KEN
+ * @created time :2018-05
+ */
+ public function incr_pages_num($url = '')
+ {
+ if ( ! empty($url))
+ {
+ $domain = $this->getRootDomain($url, 'host');
+ }
+ if (empty($domain))
+ {
+ $domain = 'all';
+ }
+ if (self::$use_redis)
+ {
+ $pages_num[$domain] = queue::incr('pages_num:'.$domain);
+ }
+ else
+ {
+ if (empty(self::$pages_num[$domain]))
+ {
+ self::$pages_num[$domain] = 1;
+ }
+ else
+ {
+ self::$pages_num[$domain]++;
+ }
+ $pages_num[$domain] = self::$pages_num[$domain];
+ }
+ return $pages_num[$domain];
+ }
+
+ /**
+ * 超过1秒的慢速采集时间计数,用于限制单域名总采集时间上限
+ *
+ * @return void
+ * @author KEN
+ * @created time :2018-05
+ */
+ public function incr_duration_num($url = '', $time_run = 1)
+ {
+ if ( ! empty($url))
+ {
+ $domain = $this->getRootDomain($url);
+ }
+ if (empty($domain))
+ {
+ $domain = 'all';
+ }
+ if (self::$use_redis)
+ {
+ $duration[$domain] = queue::incr('duration:'.$domain, $time_run);
+ }
+ else
+ {
+ if (empty(self::$duration[$domain]))
+ {
+ self::$duration[$domain] = $time_run;
+ }
+ else
+ {
+ self::$duration[$domain] += $time_run;
+ }
+ $duration[$domain] = self::$duration[$domain];
+ }
+ return $duration[$domain];
+ }
+
+ /**
+ * 读取单域名总慢速采集(响应超过1秒)的时间
+ *
+ * @return void
+ * @author KEN
+ * @created time :2018-04
+ */
+ public function get_duration_num($url = '')
+ {
+ if ( ! empty($url))
+ {
+ $domain = $this->getRootDomain($url);
+ }
+ if (empty($domain))
+ {
+ $domain = 'all';
+ }
+ if (self::$use_redis)
+ {
+ $duration[$domain] = queue::get('duration:'.$domain);
+ }
+ else
+ {
+ $duration[$domain] = ! empty(self::$duration[$domain]) ? self::$duration[$domain] : 0;
+ }
+ return $duration[$domain] ? $duration[$domain] : 0;
+ }
+
+ /**
+ * 单 host 当前并发计数
+ * @return int
+ * @author KEN
+ * @created time :2018-05-28 16:40
+ */
+ public function incr_task_per_host($url = '', $type = 'incr')
+ {
+ if (empty($url))
+ {
+ return false;
+ }
+ $domain = $this->getRootDomain($url, 'host');
+ if (empty($domain))
+ {
+ return false;
+ }
+ if (self::$use_redis)
+ {
+ if ($type == 'decr')
+ {
+ $task_per_host_counter[$domain] = queue::decr('task_per_host:'.$domain);
+ }
+ else
+ {
+ $task_per_host_counter[$domain] = queue::incr('task_per_host:'.$domain);
+ }
+ }
+ else
+ {
+
+ if (empty(self::$task_per_host_counter[$domain]))
+ {
+ self::$task_per_host_counter[$domain] = 1;
+ }
+ else
+ {
+ if ($type == 'decr')
+ {
+ self::$task_per_host_counter[$domain]--;
+ }
+ else
+ {
+ self::$task_per_host_counter[$domain]++;
+ }
+ }
+ $task_per_host_counter[$domain] = self::$task_per_host_counter[$domain];
+ }
+ return $task_per_host_counter[$domain];
+ }
+
+ //获取url所属 host 当前并发数量 KEN
+ public function get_task_per_host_num($url)
+ {
+ if (empty($url))
+ {
+ return 0;
+ }
+ $domain = $this->getRootDomain($url, 'host');
+ if (empty($domain))
+ {
+ return 0;
+ }
+ if (self::$use_redis)
+ {
+ $count = queue::get('task_per_host:'.$domain);
+ }
+ else
+ {
+ $count = self::$task_per_host_counter[$domain];
+ }
+ return $count;
+ }
+
+ /**
+ * 采用xpath分析提取字段
+ *
+ * @param mixed $html
+ * @param mixed $selector
+ * @return void
+ * @author seatle
+ * @created time :2016-09-18 10:17
+ */
+ public function get_fields_xpath($html, $selector, $fieldname)
+ {
+ $result = selector::select($html, $selector);
+ if (selector::$error)
+ {
+ log::error("Field(\"{$fieldname}\") ".selector::$error."\n");
+ }
+ return $result;
+ }
+
+ /**
+ * 采用正则分析提取字段
+ *
+ * @param mixed $html
+ * @param mixed $selector
+ * @return void
+ * @author seatle
+ * @created time :2016-09-18 10:17
+ */
+ public function get_fields_regex($html, $selector, $fieldname)
+ {
+ $result = selector::select($html, $selector, 'regex');
+ if (selector::$error)
+ {
+ log::error("Field(\"{$fieldname}\") ".selector::$error."\n");
+ }
+ return $result;
+ }
+
+ /**
+ * 采用CSS选择器提取字段
+ *
+ * @param mixed $html
+ * @param mixed $selector
+ * @param mixed $fieldname
+ * @return void
+ * @author seatle
+ * @created time :2016-09-18 10:17
+ */
+ public function get_fields_css($html, $selector, $fieldname)
+ {
+ $result = selector::select($html, $selector, 'css');
+ if (selector::$error)
+ {
+ log::error("Field(\"{$fieldname}\") ".selector::$error."\n");
+ }
+ return $result;
+ }
+
+ /**
+ * 清空shell输出内容
+ *
+ * @return void
+ * @author seatle
+ * @created time :2016-11-16 11:06
+ */
+ public function clear_echo()
+ {
+ $arr = array(27, 91, 72, 27, 91, 50, 74);
+ foreach ($arr as $a)
+ {
+ print chr($a);
+ }
+ //array_map(create_function('$a', 'print chr($a);'), array(27, 91, 72, 27, 91, 50, 74));
+ }
+
+ /**
+ * 替换shell输出内容
+ *
+ * @param mixed $message
+ * @param mixed $force_clear_lines
+ * @return void
+ * @author seatle
+ * @created time :2016-11-16 11:06
+ */
+ public function replace_echo($message, $force_clear_lines = NULL)
+ {
+ static $last_lines = 0;
+
+ if(!is_null($force_clear_lines))
+ {
+ $last_lines = $force_clear_lines;
+ }
+
+ // 获取终端宽度
+ $toss = $status = null;
+ $term_width = exec('tput cols', $toss, $status);
+ if($status || empty($term_width))
+ {
+ $term_width = 64; // Arbitrary fall-back term width.
+ }
+
+ $line_count = 0;
+ foreach(explode("\n", $message) as $line)
+ {
+ $line_count += count(str_split($line, $term_width));
+ }
+
+ // Erasure MAGIC: Clear as many lines as the last output had.
+ for($i = 0; $i < $last_lines; $i++)
+ {
+ // Return to the beginning of the line
+ echo "\r";
+ // Erase to the end of the line
+ echo "\033[K";
+ // Move cursor Up a line
+ echo "\033[1A";
+ // Return to the beginning of the line
+ echo "\r";
+ // Erase to the end of the line
+ echo "\033[K";
+ // Return to the beginning of the line
+ echo "\r";
+ // Can be consolodated into
+ // echo "\r\033[K\033[1A\r\033[K\r";
+ }
+
+ $last_lines = $line_count;
+
+ echo $message."\n";
+ }
+
+ /**
+ * 展示启动界面, Windows 不会到这里来
+ * @return void
+ */
+ public function display_ui()
+ {
+ $loadavg = sys_getloadavg();
+ foreach ($loadavg as $k=>$v)
+ {
+ $loadavg[$k] = round($v, 2);
+ }
+ $display_str = "\033[1A\n\033[K-----------------------------\033[47;30m PHPSPIDER \033[0m-----------------------------\n\033[0m";
+ //$display_str = "-----------------------------\033[47;30m PHPSPIDER \033[0m-----------------------------\n\033[0m";
+ $run_time_str = util::time2second(time() - self::$time_start, false);
+ $display_str .= 'PHPSpider version:'.self::VERSION.' PHP version:'.PHP_VERSION."\n";
+ $display_str .= 'start time:'.date('Y-m-d H:i:s', self::$time_start).' run '.$run_time_str." \n";
+
+ $display_str .= 'spider name: '.self::$configs['name']."\n";
+ if (self::$multiserver)
+ {
+ $display_str .= 'server id: '.self::$serverid."\n";
+ }
+ $display_str .= 'task number: '.self::$tasknum."\n";
+ $display_str .= 'load average: '.implode(', ', $loadavg)."\n";
+ $display_str .= "document: https://doc.phpspider.org\n";
+
+ $display_str .= $this->display_task_ui();
+
+ if (self::$multiserver)
+ {
+ $display_str .= $this->display_server_ui();
+ }
+
+ $display_str .= $this->display_collect_ui();
+
+ // 清屏
+ //$this->clear_echo();
+ // 返回到第一行,第一列
+ //echo "\033[0;0H";
+ $display_str .= "---------------------------------------------------------------------\n";
+ $display_str .= 'Press Ctrl-C to quit. Start success.'.date('Y-m-d H:i:s').' - '.round(memory_get_usage() / 1024 / 1024, 2).'MB'."\n";
+ if (self::$terminate)
+ {
+ $display_str .= "\n\033[33mWait for the process exits...\033[0m";
+ }
+ //echo $display_str;
+ $this->replace_echo($display_str);
+ }
+
+ public function display_task_ui()
+ {
+ $display_str = "-------------------------------\033[47;30m TASKS \033[0m-------------------------------\n";
+
+ $display_str .= "\033[47;30mtaskid\033[0m". str_pad('', self::$taskid_length+2-strlen('taskid')).
+ "\033[47;30mtaskpid\033[0m". str_pad('', self::$pid_length+2-strlen('taskpid')).
+ "\033[47;30mmem\033[0m". str_pad('', self::$mem_length+2-strlen('mem')).
+ "\033[47;30mcollect succ\033[0m". str_pad('', self::$urls_length-strlen('collect succ')).
+ "\033[47;30mcollect fail\033[0m". str_pad('', self::$urls_length-strlen('collect fail')).
+ "\033[47;30mspeed\033[0m". str_pad('', self::$speed_length+2-strlen('speed')).
+ "\n";
+
+ // "\033[32;40m [OK] \033[0m"
+ $task_status = $this->get_task_status_list(self::$serverid, self::$tasknum);
+ foreach ($task_status as $json)
+ {
+ $task = json_decode($json, true);
+ if (empty($task))
+ {
+ continue;
+ }
+ $display_str .= str_pad($task['id'], self::$taskid_length + 2).
+ str_pad($task['pid'], self::$pid_length + 2).
+ str_pad($task['mem'].'MB', self::$mem_length + 2).
+ str_pad($task['collect_succ'], self::$urls_length).
+ str_pad($task['collect_fail'], self::$urls_length).
+ str_pad($task['speed'].'/s', self::$speed_length + 2).
+ "\n";
+ }
+ //echo "\033[9;0H";
+ return $display_str;
+ }
+
+ public function display_server_ui()
+ {
+ $display_str = "-------------------------------\033[47;30m SERVER \033[0m------------------------------\n";
+
+ $display_str .= "\033[47;30mserver\033[0m". str_pad('', self::$server_length+2-strlen('serverid')).
+ "\033[47;30mtasknum\033[0m". str_pad('', self::$tasknum_length+2-strlen('tasknum')).
+ "\033[47;30mmem\033[0m". str_pad('', self::$mem_length+2-strlen('mem')).
+ "\033[47;30mcollect succ\033[0m". str_pad('', self::$urls_length-strlen('collect succ')).
+ "\033[47;30mcollect fail\033[0m". str_pad('', self::$urls_length-strlen('collect fail')).
+ "\033[47;30mspeed\033[0m". str_pad('', self::$speed_length+2-strlen('speed')).
+ "\n";
+
+ $server_list_json = queue::get('server_list');
+ $server_list = json_decode($server_list_json, true);
+ foreach ($server_list as $server)
+ {
+ $serverid = $server['serverid'];
+ $tasknum = $server['tasknum'];
+ $mem = 0;
+ $speed = 0;
+ $collect_succ = $collect_fail = 0;
+ $task_status = $this->get_task_status_list($serverid, $tasknum);
+ foreach ($task_status as $json)
+ {
+ $task = json_decode($json, true);
+ if (empty($task))
+ {
+ continue;
+ }
+ $mem += $task['mem'];
+ $speed += $task['speed'];
+ $collect_fail += $task['collect_fail'];
+ $collect_succ += $task['collect_succ'];
+ }
+
+ $display_str .= str_pad($serverid, self::$server_length).
+ str_pad($tasknum, self::$tasknum_length + 2).
+ str_pad($mem.'MB', self::$mem_length + 2).
+ str_pad($collect_succ, self::$urls_length).
+ str_pad($collect_fail, self::$urls_length).
+ str_pad($speed.'/s', self::$speed_length + 2).
+ "\n";
+ }
+ return $display_str;
+ }
+
+ public function display_collect_ui()
+ {
+ $display_str = "---------------------------\033[47;30m COLLECT STATUS \033[0m--------------------------\n";
+
+ $display_str .= "\033[47;30mfind pages\033[0m". str_pad('', 16-strlen('find pages')).
+ "\033[47;30mqueue\033[0m". str_pad('', 14-strlen('queue')).
+ "\033[47;30mcollected\033[0m". str_pad('', 15-strlen('collected')).
+ "\033[47;30mfields\033[0m". str_pad('', 15-strlen('fields')).
+ "\033[47;30mdepth\033[0m". str_pad('', 12-strlen('depth')).
+ "\n";
+
+ $collect = $this->get_collect_url_num();
+ $collected = $this->get_collected_url_num();
+ $queue = $this->queue_lsize();
+ $fields = $this->get_fields_num();
+ $depth = $this->get_depth_num();
+ $display_str .= str_pad($collect, 16);
+ $display_str .= str_pad($queue, 14);
+ $display_str .= str_pad($collected, 15);
+ $display_str .= str_pad($fields, 15);
+ $display_str .= str_pad($depth, 12);
+ $display_str .= "\n";
+ return $display_str;
+ }
+
+ /**
+ * 判断是否附件文件
+ *
+ * @return void
+ * @author seatle
+ * @created time :2016-09-23 17:13
+ */
+ //public function is_attachment_file($url)
+ //{
+ //$mime_types = $GLOBALS['config']['mimetype'];
+ //$mime_types_flip = array_flip($mime_types);
+
+ //$pathinfo = pathinfo($url);
+ //$fileext = isset($pathinfo['extension']) ? $pathinfo['extension'] : '';
+
+ //$fileinfo = array();
+ //// 存在文件后缀并且是配置里面的后缀
+ //if (!empty($fileext) && isset($mime_types_flip[$fileext]))
+ //{
+ //stream_context_set_default(
+ //array(
+ //'http' => array(
+ //'method' => 'HEAD'
+ //)
+ //)
+ //);
+ //// 代理和Cookie以后实现, 方法和 file_get_contents 一样 使用 stream_context_create 设置
+ //$headers = get_headers($url, 1);
+ //if (strpos($headers[0], '302'))
+ //{
+ //$url = $headers['Location'];
+ //$headers = get_headers($url, 1);
+ //}
+ ////print_r($headers);
+ //$fileinfo = array(
+ //'basename' => isset($pathinfo['basename']) ? $pathinfo['basename'] : '',
+ //'filename' => isset($pathinfo['filename']) ? $pathinfo['filename'] : '',
+ //'fileext' => isset($pathinfo['extension']) ? $pathinfo['extension'] : '',
+ //'filesize' => isset($headers['Content-Length']) ? $headers['Content-Length'] : 0,
+ //'atime' => isset($headers['Date']) ? strtotime($headers['Date']) : time(),
+ //'mtime' => isset($headers['Last-Modified']) ? strtotime($headers['Last-Modified']) : time(),
+ //);
+
+ //$mime_type = 'html';
+ //$content_type = isset($headers['Content-Type']) ? $headers['Content-Type'] : '';
+ //if (!empty($content_type))
+ //{
+ //$mime_type = isset($GLOBALS['config']['mimetype'][$content_type]) ? $GLOBALS['config']['mimetype'][$content_type] : $mime_type;
+ //}
+ //$mime_types_flip = array_flip($mime_types);
+ //// 判断一下是不是文件名被加什么后缀了, 比如 http://www.xxxx.com/test.jpg?token=xxxxx
+ //if (!isset($mime_types_flip[$fileinfo['fileext']]))
+ //{
+ //$fileinfo['fileext'] = $mime_type;
+ //$fileinfo['basename'] = $fileinfo['filename'].'.'.$mime_type;
+ //}
+ //}
+ //return $fileinfo;
+ //}
+
+ //返回当前是否是主进程
+ public function is_taskmaster()
+ {
+ return self::$taskmaster;
+ }
+
+ //返回当前是否进程ID
+ public function get_task_id()
+ {
+ return self::$taskid;
+ }
+
+ //检测子域名数量
+ public function sub_domain_count($url)
+ {
+ if (empty($url))
+ {
+ return 0;
+ }
+ $count = 0;
+ $domain = $this->getRootDomain($url, 'root');
+ if (empty($domain))
+ {
+ return 0;
+ }
+ $host = $this->getRootDomain($url, 'host');
+ if (empty($host))
+ {
+ return $count;
+ }
+ if (self::$use_redis)
+ {
+ $count = queue::get($domain);
+ if ( ! empty(self::$configs['max_sub_num']) and $count > self::$configs['max_sub_num'])
+ {
+ return $count;
+ }
+ if (strlen($host) > 32)
+ {
+ $host = md5($host);
+ }
+ $hostkey = 'sub_d-'.$host;
+ $exists = queue::exists($hostkey);
+ if ( ! $exists)
+ {
+ // 子域名数量加一
+ $count = queue::incr($domain);
+ queue::set($hostkey, 1);
+ }
+ }
+ return $count;
+ }
+
+ //提取url的根域名 host domain subdomain name tld
+ public function getRootDomain($url = '', $type = 'root', $domain_check = false)
+ {
+ if (empty($url))
+ {
+ return $url;
+ }
+ $url = trim($url);
+ if ( ! preg_match('/^http/i', $url))
+ {
+ $url = 'http://'.$url;
+ }
+ //截取限定字符
+ $arr = array();
+ if (preg_match_all('/(^https?:\/\/[\p{Han}a-zA-Z0-9\-\.\/]+)/iu', $url, $arr))
+ {
+ $url = $arr['0']['0'];
+ unset($arr);
+ }
+ $url_parse = parse_url(strtolower($url));
+ if (empty($url_parse['host']))
+ {
+ return '';
+ }
+ //host判断快速返回
+ if ($domain_check === false and $type == 'host')
+ {
+ return $url_parse['host'];
+ }
+
+ //结束数组初始化
+ $res = array(
+ 'scheme' => '',
+ 'host' => '',
+ 'path' => '',
+ 'name' => '',
+ 'domain' => '',
+ );
+
+ $urlarr = explode('.', $url_parse['host']);
+ $count = count($urlarr);
+ $res['scheme'] = $url_parse['scheme'];
+ $res['host'] = $url_parse['host'];
+ if ( ! empty($url_parse['path']))
+ {
+ $res['path'] = $url_parse['path'];
+ }
+ #列举域名中固定元素
+ $state_domain = array('com', 'edu', 'gov', 'int', 'mil', 'net', 'org', 'biz', 'info', 'pro', 'name', 'coop', 'aero', 'xxx', 'idv', 'mobi', 'cc', 'me', 'jp', 'uk', 'ws', 'eu', 'pw', 'kr', 'io', 'us', 'cn', 'al', 'dz', 'af', 'ar', 'ae', 'aw', 'om', 'az', 'eg', 'et', 'ie', 'ee', 'ad', 'ao', 'ai', 'ag', 'at', 'au', 'mo', 'bb', 'pg', 'bs', 'pk', 'py', 'ps', 'bh', 'pa', 'br', 'by', 'bm', 'bg', 'mp', 'bj', 'be', 'is', 'pr', 'ba', 'pl', 'bo', 'bz', 'bw', 'bt', 'bf', 'bi', 'bv', 'kp', 'gq', 'dk', 'de', 'tl', 'tp', 'tg', 'dm', 'do', 'ru', 'ec', 'er', 'fr', 'fo', 'pf', 'gf', 'tf', 'va', 'ph', 'fj', 'fi', 'cv', 'fk', 'gm', 'cg', 'cd', 'co', 'cr', 'gg', 'gd', 'gl', 'ge', 'cu', 'gp', 'gu', 'gy', 'kz', 'ht', 'nl', 'an', 'hm', 'hn', 'ki', 'dj', 'kg', 'gn', 'gw', 'ca', 'gh', 'ga', 'kh', 'cz', 'zw', 'cm', 'qa', 'ky', 'km', 'ci', 'kw', 'hr', 'ke', 'ck', 'lv', 'ls', 'la', 'lb', 'lt', 'lr', 'ly', 'li', 're', 'lu', 'rw', 'ro', 'mg', 'im', 'mv', 'mt', 'mw', 'my', 'ml', 'mk', 'mh', 'mq', 'yt', 'mu', 'mr', 'um', 'as', 'vi', 'mn', 'ms', 'bd', 'pe', 'fm', 'mm', 'md', 'ma', 'mc', 'mz', 'mx', 'nr', 'np', 'ni', 'ne', 'ng', 'nu', 'no', 'nf', 'na', 'za', 'aq', 'gs', 'pn', 'pt', 'se', 'ch', 'sv', 'yu', 'sl', 'sn', 'cy', 'sc', 'sa', 'cx', 'st', 'sh', 'kn', 'lc', 'sm', 'pm', 'vc', 'lk', 'sk', 'si', 'sj', 'sz', 'sd', 'sr', 'sb', 'so', 'tj', 'tw', 'th', 'tz', 'to', 'tc', 'tt', 'tn', 'tv', 'tr', 'tm', 'tk', 'wf', 'vu', 'gt', 've', 'bn', 'ug', 'ua', 'uy', 'uz', 'es', 'eh', 'gr', 'hk', 'sg', 'nc', 'nz', 'hu', 'sy', 'jm', 'am', 'ac', 'ye', 'iq', 'ir', 'il', 'it', 'in', 'id', 'vg', 'jo', 'vn', 'zm', 'je', 'td', 'gi', 'cl', 'cf', 'yr', 'arpa', 'museum', 'asia', 'ax', 'bl', 'bq', 'cat', 'cw', 'gb', 'jobs', 'mf', 'rs', 'su', 'sx', 'tel', 'travel', 'shop', 'ltd', 'store', 'vip', '网店', '中国', '公司', '网络', 'co.il', 'co.nz', 'co.uk', 'me.uk', 'org.uk', 'com.sb', '在线', '中文网', '移动', 'wang', 'club', 'ren', 'top', 'website', 'cool', 'company', 'city', 'email', 'market', 'software', 'ninja', '我爱你', 'bike', 'today', 'life', 'space', 'pub', 'site', 'help', 'link', 'photo', 'video', 'click', 'pics', 'sexy', 'audio', 'gift', 'tech', '网址', 'online', 'win', 'download', 'party', 'bid', 'loan', 'date', 'trade', 'red', 'blue', 'pink', 'poker', 'green', 'farm', 'zone', 'guru', 'tips', 'land', 'care', 'camp', 'cab', 'cash', 'limo', 'toys', 'tax', 'town', 'fish', 'fund', 'fail', 'house', 'shoes', 'media', 'guide', 'tools', 'solar', 'watch', 'cheap', 'rocks', 'news', 'live', 'lawyer', 'host', 'wiki', 'ink', 'design', 'lol', 'hiphop', 'hosting', 'diet', 'flowers', 'car', 'cars', 'auto', 'mom', 'cq', 'he', 'nm', 'ln', 'jl', 'hl', 'js', 'zj', 'ah', 'jx', 'ha', 'hb', 'gx', 'hi', 'gz', 'yn', 'xz', 'qh', 'nx', 'xj', 'xyz', 'xin', 'science', 'press', 'band', 'engineer', 'social', 'studio', 'work', 'game', 'kim', 'games', 'group', '集团');
+ if ($count <= 2)
+ {
+ #当域名直接根形式不存在host部分直接输出
+ $last = array_pop($urlarr);
+ $last_1 = array_pop($urlarr);
+ if (in_array($last, $state_domain))
+ {
+ $res['domain'] = $last_1.'.'.$last;
+ $res['name'] = $last_1;
+ $res['tld'] = $last;
+ }
+ }
+ elseif ($count > 2)
+ {
+ $last = array_pop($urlarr);
+ $last_1 = array_pop($urlarr);
+ $last_2 = array_pop($urlarr);
+ $res['domain'] = $last_1.'.'.$last; //默认为n.com形式
+ $res['name'] = $last_2;
+
+ //排除非标准 ltd 域名
+ if ( ! in_array($last, $state_domain))
+ {
+ return false;
+ }
+
+ if (in_array($last, $state_domain))
+ {
+ $res['domain'] = $last_1.'.'.$last; //n.com形式
+ $res['name'] = $last_1;
+ $res['tld'] = $last;
+ }
+ //排除顶级根二级后缀
+ if ($last_1 !== $last and in_array($last_1, $state_domain) and ! in_array($last, array('com', 'net', 'org', 'edu', 'gov')))
+ {
+ $res['domain'] = $last_2.'.'.$last_1.'.'.$last; //n.n.com形式
+ $res['name'] = $last_2;
+ $res['tld'] = $last_1.'.'.$last;
+ }
+ //限定cn顶级根二级后缀为'com', 'net', 'org', 'edu', 'gov'
+ if (in_array($last, array('cn')) and $last_1 !== $last and strlen($last_1) > 2 and ! in_array($last_1, array('com', 'net', 'org', 'edu', 'gov')))
+ {
+ $res['domain'] = $last_1.'.'.$last; //n.n.cn形式
+ $res['name'] = $last_1;
+ $res['tld'] = $last;
+ }
+ }
+
+ //检测和验证返回的是不是域名格式
+ if ( ! empty($res['domain']) and preg_match('/^([\p{Han}a-zA-Z0-9])+([\p{Han}a-zA-Z0-9\-])*\.[a-zA-Z\.\p{Han}]+$/iu', $res['domain']))
+ {
+ if ($type == 'arr')
+ {
+ return $res;
+ }
+ elseif ($type == 'host')
+ {
+ return $res['host'];
+ }
+ elseif ($type == 'tld')
+ {
+ return $res['tld'];
+ }
+ elseif ($type == 'subdomain')
+ {
+ return $res['name'];
+ }
+ else
+ {
+ return $res['domain'];
+ }
+ }
+ else
+ {
+ return '';
+ }
+ }
+
+}
diff --git a/vendor/owner888/phpspider/core/queue.php b/vendor/owner888/phpspider/core/queue.php
new file mode 100644
index 0000000..3ea4149
--- /dev/null
+++ b/vendor/owner888/phpspider/core/queue.php
@@ -0,0 +1,1388 @@
+
+// +----------------------------------------------------------------------
+
+//----------------------------------
+// PHPSpider Redis操作类文件
+//----------------------------------
+
+namespace phpspider\core;
+
+use Exception;
+use Redis;
+
+class queue
+{
+ /**
+ * redis链接标识符号
+ */
+ protected static $redis = NULL;
+
+ /**
+ * redis配置数组
+ */
+ protected static $configs = array();
+ private static $links = array();
+ private static $link_name = 'default';
+
+ /**
+ * 默认redis前缀
+ */
+ public static $prefix = 'phpspider';
+
+ public static $error = '';
+
+ public static function init()
+ {
+ if ( ! extension_loaded('redis'))
+ {
+ self::$error = 'The redis extension was not found';
+ return false;
+ }
+
+ // 获取配置
+ $config = self::$link_name == 'default' ? self::_get_default_config() : self::$configs[self::$link_name];
+
+ // 如果当前链接标识符为空,或者ping不同,就close之后重新打开
+ //if ( empty(self::$links[self::$link_name]) || !self::ping() )
+ if (empty(self::$links[self::$link_name]))
+ {
+ self::$links[self::$link_name] = new Redis();
+ if (strstr($config['host'], '.sock'))
+ {
+ if ( ! self::$links[self::$link_name]->connect($config['host']))
+ {
+ self::$error = 'Unable to connect to redis server';
+ unset(self::$links[self::$link_name]);
+ return false;
+ }
+ }
+ else
+ {
+ if ( ! self::$links[self::$link_name]->connect($config['host'], $config['port'], $config['timeout']))
+ {
+ self::$error = 'Unable to connect to redis server';
+ unset(self::$links[self::$link_name]);
+ return false;
+ }
+ }
+
+ // 验证
+ if ($config['pass'])
+ {
+ if ( ! self::$links[self::$link_name]->auth($config['pass']))
+ {
+ self::$error = 'Redis Server authentication failed';
+ unset(self::$links[self::$link_name]);
+ return false;
+ }
+ }
+
+ $prefix = empty($config['prefix']) ? self::$prefix : $config['prefix'];
+ self::$links[self::$link_name]->setOption(Redis::OPT_PREFIX, $prefix.':');
+ // 永不超时
+ // ini_set('default_socket_timeout', -1); 无效,要用下面的做法
+ self::$links[self::$link_name]->setOption(Redis::OPT_READ_TIMEOUT, -1);
+ self::$links[self::$link_name]->select($config['db']);
+ }
+
+ return self::$links[self::$link_name];
+ }
+
+ public static function clear_link()
+ {
+ if(self::$links)
+ {
+ foreach(self::$links as $k=>$v)
+ {
+ $v->close();
+ unset(self::$links[$k]);
+ }
+ }
+ }
+
+ public static function set_connect($link_name, $config = array())
+ {
+ self::$link_name = $link_name;
+ if (!empty($config))
+ {
+ self::$configs[self::$link_name] = $config;
+ }
+ else
+ {
+ if (empty(self::$configs[self::$link_name]))
+ {
+ throw new Exception('You not set a config array for connect!');
+ }
+ }
+ //print_r(self::$configs);
+
+ //// 先断开原来的连接
+ //if ( !empty(self::$links[self::$link_name]) )
+ //{
+ //self::$links[self::$link_name]->close();
+ //self::$links[self::$link_name] = null;
+ //}
+ }
+
+ public static function set_connect_default()
+ {
+ $config = self::_get_default_config();
+ self::set_connect('default', $config);
+ }
+
+ /**
+ * 获取默认配置
+ */
+ protected static function _get_default_config()
+ {
+ if (empty(self::$configs['default']))
+ {
+ if (!is_array($GLOBALS['config']['redis']))
+ {
+ exit('cls_redis.php _get_default_config()' . '没有redis配置');
+ // You not set a config array for connect\nPlease check the configuration file config/inc_config.php
+ }
+ self::$configs['default'] = $GLOBALS['config']['redis'];
+ }
+ return self::$configs['default'];
+ }
+
+ /**
+ * set
+ *
+ * @param mixed $key 键
+ * @param mixed $value 值
+ * @param int $expire 过期时间,单位:秒
+ * @return void
+ * @author seatle
+ * @created time :2015-12-13 01:05
+ */
+ public static function set($key, $value, $expire = 0)
+ {
+ self::init();
+ try
+ {
+ if ( self::$links[self::$link_name] )
+ {
+ if ($expire > 0)
+ {
+ return self::$links[self::$link_name]->setex($key, $expire, $value);
+ }
+ else
+ {
+ return self::$links[self::$link_name]->set($key, $value);
+ }
+ }
+ }
+ catch (Exception $e)
+ {
+ $msg = "PHP Fatal error: Uncaught exception 'RedisException' with message '".$e->getMessage()."'\n";
+ log::warn($msg);
+ if ($e->getCode() == 0)
+ {
+ self::$links[self::$link_name]->close();
+ self::$links[self::$link_name] = null;
+ usleep(100000);
+ return self::set($key, $value, $expire);
+ }
+ }
+ return NULL;
+ }
+
+
+ /**
+ * set
+ *
+ * @param mixed $key 键
+ * @param mixed $value 值
+ * @param int $expire 过期时间,单位:秒
+ * @return void
+ * @author seatle
+ * @created time :2015-12-13 01:05
+ */
+ public static function setnx($key, $value, $expire = 0)
+ {
+ self::init();
+ try
+ {
+ if ( self::$links[self::$link_name] )
+ {
+ if ($expire > 0)
+ {
+ return self::$links[self::$link_name]->set($key, $value, array('nx', 'ex' => $expire));
+ //self::$links[self::$link_name]->multi();
+ //self::$links[self::$link_name]->setNX($key, $value);
+ //self::$links[self::$link_name]->expire($key, $expire);
+ //self::$links[self::$link_name]->exec();
+ //return true;
+ }
+ else
+ {
+ return self::$links[self::$link_name]->setnx($key, $value);
+ }
+ }
+ }
+ catch (Exception $e)
+ {
+ $msg = "PHP Fatal error: Uncaught exception 'RedisException' with message '".$e->getMessage()."'\n";
+ log::warn($msg);
+ if ($e->getCode() == 0)
+ {
+ self::$links[self::$link_name]->close();
+ self::$links[self::$link_name] = null;
+ usleep(100000);
+ return self::setnx($key, $value, $expire);
+ }
+ }
+ return NULL;
+ }
+
+ /**
+ * 锁
+ * 默认锁1秒
+ *
+ * @param mixed $name 锁的标识名
+ * @param mixed $value 锁的值,貌似没啥意义
+ * @param int $expire 当前锁的最大生存时间(秒),必须大于0,超过生存时间系统会自动强制释放锁
+ * @param int $interval 获取锁失败后挂起再试的时间间隔(微秒)
+ * @return void
+ * @author seatle
+ * @created time :2016-10-30 23:56
+ */
+ public static function lock($name, $value = 1, $expire = 5, $interval = 100000)
+ {
+ if ($name == null)
+ {
+ return false;
+ }
+
+ self::init();
+ try
+ {
+ if ( self::$links[self::$link_name] )
+ {
+ $key = "Lock:{$name}";
+ while (true)
+ {
+ // 因为 setnx 没有 expire 设置,所以还是用set
+ //$result = self::$links[self::$link_name]->setnx($key, $value);
+ $result = self::$links[self::$link_name]->set($key, $value, array('nx', 'ex' => $expire));
+ if ($result != false)
+ {
+ return true;
+ }
+
+ usleep($interval);
+ }
+ return false;
+ }
+ }
+ catch (Exception $e)
+ {
+ $msg = "PHP Fatal error: Uncaught exception 'RedisException' with message '".$e->getMessage()."'\n";
+ log::warn($msg);
+ if ($e->getCode() == 0)
+ {
+ self::$links[self::$link_name]->close();
+ self::$links[self::$link_name] = null;
+ // 睡眠100毫秒
+ usleep(100000);
+ return self::lock($name, $value, $expire, $interval);
+ }
+ }
+ return false;
+ }
+
+ public static function unlock($name)
+ {
+ $key = "Lock:{$name}";
+ return self::del($key);
+ }
+
+ /**
+ * get
+ *
+ * @param mixed $key
+ * @return void
+ * @author seatle
+ * @created time :2015-12-13 01:05
+ */
+ public static function get($key)
+ {
+ self::init();
+ try
+ {
+ if ( self::$links[self::$link_name] )
+ {
+ return self::$links[self::$link_name]->get($key);
+ }
+ }
+ catch (Exception $e)
+ {
+ $msg = "PHP Fatal error: Uncaught exception 'RedisException' with message '".$e->getMessage()."'\n";
+ log::warn($msg);
+ if ($e->getCode() == 0)
+ {
+ self::$links[self::$link_name]->close();
+ self::$links[self::$link_name] = null;
+ usleep(100000);
+ return self::get($key);
+ }
+ }
+ return NULL;
+ }
+
+ /**
+ * del 删除数据
+ *
+ * @param mixed $key
+ * @return void
+ * @author seatle
+ * @created time :2015-12-13 01:05
+ */
+ public static function del($key)
+ {
+ self::init();
+ try
+ {
+ if ( self::$links[self::$link_name] )
+ {
+ return self::$links[self::$link_name]->del($key);
+ }
+ }
+ catch (Exception $e)
+ {
+ $msg = "PHP Fatal error: Uncaught exception 'RedisException' with message '".$e->getMessage()."'\n";
+ log::warn($msg);
+ if ($e->getCode() == 0)
+ {
+ self::$links[self::$link_name]->close();
+ self::$links[self::$link_name] = null;
+ usleep(100000);
+ return self::del($key);
+ }
+ }
+ return NULL;
+ }
+
+ /**
+ * type 返回值的类型
+ *
+ * @param mixed $key
+ * @return void
+ * @author seatle
+ * @created time :2015-12-13 01:05
+ */
+ public static function type($key)
+ {
+ self::init();
+
+ $types = array(
+ '0' => 'set',
+ '1' => 'string',
+ '3' => 'list',
+ );
+
+ try
+ {
+ if ( self::$links[self::$link_name] )
+ {
+ $type = self::$links[self::$link_name]->type($key);
+ if (isset($types[$type]))
+ {
+ return $types[$type];
+ }
+ }
+ }
+ catch (Exception $e)
+ {
+ $msg = "PHP Fatal error: Uncaught exception 'RedisException' with message '".$e->getMessage()."'\n";
+ log::warn($msg);
+ if ($e->getCode() == 0)
+ {
+ self::$links[self::$link_name]->close();
+ self::$links[self::$link_name] = null;
+ usleep(100000);
+ return self::type($key);
+ }
+ }
+ return NULL;
+ }
+
+ /**
+ * incr 名称为key的string增加integer, integer为0则增1
+ *
+ * @param mixed $key
+ * @param int $integer
+ * @return void
+ * @author seatle
+ * @created time :2015-12-18 11:28
+ */
+ public static function incr($key, $integer = 0)
+ {
+ self::init();
+ try
+ {
+ if ( self::$links[self::$link_name] )
+ {
+ if (empty($integer))
+ {
+ return self::$links[self::$link_name]->incr($key);
+ }
+ else
+ {
+ return self::$links[self::$link_name]->incrby($key, $integer);
+ }
+ }
+ }
+ catch (Exception $e)
+ {
+ $msg = "PHP Fatal error: Uncaught exception 'RedisException' with message '".$e->getMessage()."'\n";
+ log::warn($msg);
+ if ($e->getCode() == 0)
+ {
+ self::$links[self::$link_name]->close();
+ self::$links[self::$link_name] = null;
+ usleep(100000);
+ return self::incr($key, $integer);
+ }
+ }
+ return NULL;
+ }
+
+ /**
+ * decr 名称为key的string减少integer, integer为0则减1
+ *
+ * @param mixed $key
+ * @param int $integer
+ * @return void
+ * @author seatle
+ * @created time :2015-12-18 11:28
+ */
+ public static function decr($key, $integer = 0)
+ {
+ self::init();
+ try
+ {
+ if ( self::$links[self::$link_name] )
+ {
+ if (empty($integer))
+ {
+ return self::$links[self::$link_name]->decr($key);
+ }
+ else
+ {
+ return self::$links[self::$link_name]->decrby($key, $integer);
+ }
+ }
+ }
+ catch (Exception $e)
+ {
+ $msg = "PHP Fatal error: Uncaught exception 'RedisException' with message '".$e->getMessage()."'\n";
+ log::warn($msg);
+ if ($e->getCode() == 0)
+ {
+ self::$links[self::$link_name]->close();
+ self::$links[self::$link_name] = null;
+ usleep(100000);
+ return self::decr($key, $integer);
+ }
+ }
+ return NULL;
+ }
+
+ /**
+ * append 名称为key的string的值附加value
+ *
+ * @param mixed $key
+ * @param mixed $value
+ * @return void
+ * @author seatle
+ * @created time :2015-12-18 11:28
+ */
+ public static function append($key, $value)
+ {
+ self::init();
+ try
+ {
+ if ( self::$links[self::$link_name] )
+ {
+ return self::$links[self::$link_name]->append($key, $value);
+ }
+ }
+ catch (Exception $e)
+ {
+ $msg = "PHP Fatal error: Uncaught exception 'RedisException' with message '".$e->getMessage()."'\n";
+ log::warn($msg);
+ if ($e->getCode() == 0)
+ {
+ self::$links[self::$link_name]->close();
+ self::$links[self::$link_name] = null;
+ usleep(100000);
+ return self::append($key, $value);
+ }
+ }
+ return NULL;
+ }
+
+ /**
+ * substr 返回名称为key的string的value的子串
+ *
+ * @param mixed $key
+ * @param mixed $start
+ * @param mixed $end
+ * @return void
+ * @author seatle
+ * @created time :2015-12-18 11:28
+ */
+ public static function substr($key, $start, $end)
+ {
+ self::init();
+ try
+ {
+ if ( self::$links[self::$link_name] )
+ {
+ return self::$links[self::$link_name]->substr($key, $start, $end);
+ }
+ }
+ catch (Exception $e)
+ {
+ $msg = "PHP Fatal error: Uncaught exception 'RedisException' with message '".$e->getMessage()."'\n";
+ log::warn($msg);
+ if ($e->getCode() == 0)
+ {
+ self::$links[self::$link_name]->close();
+ self::$links[self::$link_name] = null;
+ usleep(100000);
+ return self::substr($key, $start, $end);
+ }
+ }
+ return NULL;
+ }
+
+ /**
+ * select 按索引查询
+ *
+ * @param mixed $index
+ * @return void
+ * @author seatle
+ * @created time :2015-12-18 11:28
+ */
+ public static function select($index)
+ {
+ self::init();
+ try
+ {
+ if ( self::$links[self::$link_name] )
+ {
+ return self::$links[self::$link_name]->select($index);
+ }
+ }
+ catch (Exception $e)
+ {
+ $msg = "PHP Fatal error: Uncaught exception 'RedisException' with message '".$e->getMessage()."'\n";
+ log::warn($msg);
+ if ($e->getCode() == 0)
+ {
+ self::$links[self::$link_name]->close();
+ self::$links[self::$link_name] = null;
+ usleep(100000);
+ return self::select($index);
+ }
+ }
+ return NULL;
+ }
+
+ /**
+ * dbsize 返回当前数据库中key的数目
+ *
+ * @param mixed $key
+ * @return void
+ * @author seatle
+ * @created time :2015-12-18 11:28
+ */
+ public static function dbsize()
+ {
+ self::init();
+ try
+ {
+ if ( self::$links[self::$link_name] )
+ {
+ return self::$links[self::$link_name]->dbsize();
+ }
+ }
+ catch (Exception $e)
+ {
+ $msg = "PHP Fatal error: Uncaught exception 'RedisException' with message '".$e->getMessage()."'\n";
+ log::warn($msg);
+ if ($e->getCode() == 0)
+ {
+ self::$links[self::$link_name]->close();
+ self::$links[self::$link_name] = null;
+ usleep(100000);
+ return self::dbsize();
+ }
+ }
+ return 0;
+ }
+
+ /**
+ * flushdb 删除当前选择数据库中的所有key
+ *
+ * @return void
+ * @author seatle
+ * @created time :2015-12-18 11:28
+ */
+ public static function flushdb()
+ {
+ self::init();
+ try
+ {
+ if ( self::$links[self::$link_name] )
+ {
+ return self::$links[self::$link_name]->flushdb();
+ }
+ }
+ catch (Exception $e)
+ {
+ $msg = "PHP Fatal error: Uncaught exception 'RedisException' with message '".$e->getMessage()."'\n";
+ log::warn($msg);
+ if ($e->getCode() == 0)
+ {
+ self::$links[self::$link_name]->close();
+ self::$links[self::$link_name] = null;
+ usleep(100000);
+ return self::flushdb();
+ }
+ }
+ return NULL;
+ }
+
+ /**
+ * flushall 删除所有数据库中的所有key
+ *
+ * @return void
+ * @author seatle
+ * @created time :2015-12-18 11:28
+ */
+ public static function flushall()
+ {
+ self::init();
+ try
+ {
+ if ( self::$links[self::$link_name] )
+ {
+ return self::$links[self::$link_name]->flushall();
+ }
+ }
+ catch (Exception $e)
+ {
+ $msg = "PHP Fatal error: Uncaught exception 'RedisException' with message '".$e->getMessage()."'\n";
+ log::warn($msg);
+ if ($e->getCode() == 0)
+ {
+ self::$links[self::$link_name]->close();
+ self::$links[self::$link_name] = null;
+ usleep(100000);
+ return self::flushall();
+ }
+ }
+ return NULL;
+ }
+
+ /**
+ * save 将数据保存到磁盘
+ *
+ * @param mixed $is_bgsave 将数据异步保存到磁盘
+ * @return void
+ * @author seatle
+ * @created time :2015-12-18 11:28
+ */
+ public static function save($is_bgsave = false)
+ {
+ self::init();
+ try
+ {
+ if ( self::$links[self::$link_name] )
+ {
+ if (!$is_bgsave)
+ {
+ return self::$links[self::$link_name]->save();
+ }
+ else
+ {
+ return self::$links[self::$link_name]->bgsave();
+ }
+ }
+ }
+ catch (Exception $e)
+ {
+ $msg = "PHP Fatal error: Uncaught exception 'RedisException' with message '".$e->getMessage()."'\n";
+ log::warn($msg);
+ if ($e->getCode() == 0)
+ {
+ self::$links[self::$link_name]->close();
+ self::$links[self::$link_name] = null;
+ usleep(100000);
+ return self::save($is_bgsave);
+ }
+ }
+ return NULL;
+ }
+
+ /**
+ * info 提供服务器的信息和统计
+ *
+ * @return void
+ * @author seatle
+ * @created time :2015-12-18 11:28
+ */
+ public static function info()
+ {
+ self::init();
+ try
+ {
+ if ( self::$links[self::$link_name] )
+ {
+ return self::$links[self::$link_name]->info();
+ }
+ }
+ catch (Exception $e)
+ {
+ $msg = "PHP Fatal error: Uncaught exception 'RedisException' with message '".$e->getMessage()."'\n";
+ log::warn($msg);
+ if ($e->getCode() == 0)
+ {
+ self::$links[self::$link_name]->close();
+ self::$links[self::$link_name] = null;
+ usleep(100000);
+ return self::info();
+ }
+ }
+ return NULL;
+ }
+
+ /**
+ * slowlog 慢查询日志
+ *
+ * @return void
+ * @author seatle
+ * @created time :2015-12-18 11:28
+ */
+ public static function slowlog($command = 'get', $len = 0)
+ {
+ self::init();
+ try
+ {
+ if ( self::$links[self::$link_name] )
+ {
+ if (!empty($len))
+ {
+ return $redis->slowlog($command, $len);
+ }
+ else
+ {
+ return $redis->slowlog($command);
+ }
+ }
+ }
+ catch (Exception $e)
+ {
+ $msg = "PHP Fatal error: Uncaught exception 'RedisException' with message '".$e->getMessage()."'\n";
+ log::warn($msg);
+ if ($e->getCode() == 0)
+ {
+ self::$links[self::$link_name]->close();
+ self::$links[self::$link_name] = null;
+ usleep(100000);
+ return self::slowlog($command, $len);
+ }
+ }
+ return NULL;
+ }
+
+ /**
+ * lastsave 返回上次成功将数据保存到磁盘的Unix时戳
+ *
+ * @return void
+ * @author seatle
+ * @created time :2015-12-18 11:28
+ */
+ public static function lastsave()
+ {
+ self::init();
+ try
+ {
+ if ( self::$links[self::$link_name] )
+ {
+ return self::$links[self::$link_name]->lastsave();
+ }
+ }
+ catch (Exception $e)
+ {
+ $msg = "PHP Fatal error: Uncaught exception 'RedisException' with message '".$e->getMessage()."'\n";
+ log::warn($msg);
+ if ($e->getCode() == 0)
+ {
+ self::$links[self::$link_name]->close();
+ self::$links[self::$link_name] = null;
+ usleep(100000);
+ return self::lastsave();
+ }
+ }
+ return NULL;
+ }
+
+ /**
+ * lpush 将数据从左边压入
+ *
+ * @param mixed $key
+ * @param mixed $value
+ * @return void
+ * @author seatle
+ * @created time :2015-12-13 01:05
+ */
+ public static function lpush($key, $value)
+ {
+ self::init();
+ try
+ {
+ if ( self::$links[self::$link_name] )
+ {
+ return self::$links[self::$link_name]->lpush($key, $value);
+ }
+ }
+ catch (Exception $e)
+ {
+ $msg = "PHP Fatal error: Uncaught exception 'RedisException' with message '".$e->getMessage()."'\n";
+ log::warn($msg);
+ if ($e->getCode() == 0)
+ {
+ self::$links[self::$link_name]->close();
+ self::$links[self::$link_name] = null;
+ usleep(100000);
+ return self::lpush($key, $value);
+ }
+ }
+ return NULL;
+ }
+
+ /**
+ * rpush 将数据从右边压入
+ *
+ * @param mixed $key
+ * @param mixed $value
+ * @return void
+ * @author seatle
+ * @created time :2015-12-13 01:05
+ */
+ public static function rpush($key, $value)
+ {
+ self::init();
+ try
+ {
+ if ( self::$links[self::$link_name] )
+ {
+ return self::$links[self::$link_name]->rpush($key, $value);
+ }
+ }
+ catch (Exception $e)
+ {
+ $msg = "PHP Fatal error: Uncaught exception 'RedisException' with message '".$e->getMessage()."'\n";
+ log::warn($msg);
+ if ($e->getCode() == 0)
+ {
+ self::$links[self::$link_name]->close();
+ self::$links[self::$link_name] = null;
+ usleep(100000);
+ return self::rpush($key, $value);
+ }
+ }
+ return NULL;
+ }
+
+ /**
+ * lpop 从左边弹出数据, 并删除数据
+ *
+ * @param mixed $key
+ * @return void
+ * @author seatle
+ * @created time :2015-12-13 01:05
+ */
+ public static function lpop($key)
+ {
+ self::init();
+ try
+ {
+ if ( self::$links[self::$link_name] )
+ {
+ return self::$links[self::$link_name]->lpop($key);
+ }
+ }
+ catch (Exception $e)
+ {
+ $msg = "PHP Fatal error: Uncaught exception 'RedisException' with message '".$e->getMessage()."'\n";
+ log::warn($msg);
+ if ($e->getCode() == 0)
+ {
+ self::$links[self::$link_name]->close();
+ self::$links[self::$link_name] = null;
+ usleep(100000);
+ return self::lpop($key);
+ }
+ }
+ return NULL;
+ }
+
+ /**
+ * rpop 从右边弹出数据, 并删除数据
+ *
+ * @param mixed $key
+ * @return void
+ * @author seatle
+ * @created time :2015-12-13 01:05
+ */
+ public static function rpop($key)
+ {
+ self::init();
+ try
+ {
+ if ( self::$links[self::$link_name] )
+ {
+ return self::$links[self::$link_name]->rpop($key);
+ }
+ }
+ catch (Exception $e)
+ {
+ $msg = "PHP Fatal error: Uncaught exception 'RedisException' with message '".$e->getMessage()."'\n";
+ log::warn($msg);
+ if ($e->getCode() == 0)
+ {
+ self::$links[self::$link_name]->close();
+ self::$links[self::$link_name] = null;
+ usleep(100000);
+ return self::rpop($key);
+ }
+ }
+ return NULL;
+ }
+
+ /**
+ * lsize 队列长度,同llen
+ *
+ * @param mixed $key
+ * @return void
+ * @author seatle
+ * @created time :2015-12-13 01:05
+ */
+ public static function lsize($key)
+ {
+ self::init();
+ try
+ {
+ if ( self::$links[self::$link_name] )
+ {
+ return self::$links[self::$link_name]->lSize($key);
+ }
+ }
+ catch (Exception $e)
+ {
+ $msg = "PHP Fatal error: Uncaught exception 'RedisException' with message '".$e->getMessage()."'\n";
+ log::warn($msg);
+ if ($e->getCode() == 0)
+ {
+ self::$links[self::$link_name]->close();
+ self::$links[self::$link_name] = null;
+ usleep(100000);
+ return self::lsize($key);
+ }
+ }
+ return NULL;
+ }
+
+ /**
+ * lget 获取数据
+ *
+ * @param mixed $key
+ * @param int $index
+ * @return void
+ * @author seatle
+ * @created time :2015-12-13 01:05
+ */
+ public static function lget($key, $index = 0)
+ {
+ self::init();
+ try
+ {
+ if ( self::$links[self::$link_name] )
+ {
+ return self::$links[self::$link_name]->lget($key, $index);
+ }
+ }
+ catch (Exception $e)
+ {
+ $msg = "PHP Fatal error: Uncaught exception 'RedisException' with message '".$e->getMessage()."'\n";
+ log::warn($msg);
+ if ($e->getCode() == 0)
+ {
+ self::$links[self::$link_name]->close();
+ self::$links[self::$link_name] = null;
+ usleep(100000);
+ return self::lget($key, $index);
+ }
+ }
+ return NULL;
+ }
+
+ /**
+ * lRange 获取范围数据
+ *
+ * @param mixed $key
+ * @param mixed $start
+ * @param mixed $end
+ * @return void
+ * @author seatle
+ * @created time :2015-12-13 01:05
+ */
+ public static function lrange($key, $start, $end)
+ {
+ self::init();
+ try
+ {
+ if ( self::$links[self::$link_name] )
+ {
+ return self::$links[self::$link_name]->lRange($key, $start, $end);
+ }
+ }
+ catch (Exception $e)
+ {
+ $msg = "PHP Fatal error: Uncaught exception 'RedisException' with message '".$e->getMessage()."'\n";
+ log::warn($msg);
+ if ($e->getCode() == 0)
+ {
+ self::$links[self::$link_name]->close();
+ self::$links[self::$link_name] = null;
+ usleep(100000);
+ return self::lrange($key, $start, $end);
+ }
+ }
+ return NULL;
+ }
+
+ /**
+ * rlist 从右边弹出 $length 长度数据,并删除数据
+ *
+ * @param mixed $key
+ * @param mixed $length
+ * @return void
+ * @author seatle
+ * @created time :2015-12-13 01:05
+ */
+ public static function rlist($key, $length)
+ {
+ $queue_length = self::lsize($key);
+ // 如果队列中有数据
+ if ($queue_length > 0)
+ {
+ $list = array();
+ $count = ($queue_length >= $length) ? $length : $queue_length;
+ for ($i = 0; $i < $count; $i++)
+ {
+ $data = self::rpop($key);
+ if ($data === false)
+ {
+ continue;
+ }
+
+ $list[] = $data;
+ }
+ return $list;
+ }
+ else
+ {
+ // 没有数据返回NULL
+ return NULL;
+ }
+ }
+
+ /**
+ * keys
+ *
+ * @param mixed $key
+ * @return void
+ * @author seatle
+ * @created time :2015-12-13 01:05
+ * 查找符合给定模式的key。
+ * KEYS *命中数据库中所有key。
+ * KEYS h?llo命中hello, hallo and hxllo等。
+ * KEYS h*llo命中hllo和heeeeello等。
+ * KEYS h[ae]llo命中hello和hallo,但不命中hillo。
+ * 特殊符号用"\"隔开
+ * 因为这个类加了OPT_PREFIX前缀,所以并不能真的列出redis所有的key,需要的话,要把前缀去掉
+ */
+ public static function keys($key)
+ {
+ self::init();
+ try
+ {
+ if ( self::$links[self::$link_name] )
+ {
+ return self::$links[self::$link_name]->keys($key);
+ }
+ }
+ catch (Exception $e)
+ {
+ $msg = "PHP Fatal error: Uncaught exception 'RedisException' with message '".$e->getMessage()."'\n";
+ log::warn($msg);
+ if ($e->getCode() == 0)
+ {
+ self::$links[self::$link_name]->close();
+ self::$links[self::$link_name] = null;
+ usleep(100000);
+ return self::keys($key);
+ }
+ }
+ return NULL;
+ }
+
+ /**
+ * ttl 返回某个KEY的过期时间
+ * 正数:剩余多少秒
+ * -1:永不超时
+ * -2:key不存在
+ * @param mixed $key
+ * @return void
+ * @author seatle
+ * @created time :2015-12-13 01:05
+ */
+ public static function ttl($key)
+ {
+ self::init();
+ try
+ {
+ if ( self::$links[self::$link_name] )
+ {
+ return self::$links[self::$link_name]->ttl($key);
+ }
+ }
+ catch (Exception $e)
+ {
+ $msg = "PHP Fatal error: Uncaught exception 'RedisException' with message '".$e->getMessage()."'\n";
+ log::warn($msg);
+ if ($e->getCode() == 0)
+ {
+ self::$links[self::$link_name]->close();
+ self::$links[self::$link_name] = null;
+ usleep(100000);
+ return self::ttl($key);
+ }
+ }
+ return NULL;
+ }
+
+ /**
+ * expire 为某个key设置过期时间,同setTimeout
+ *
+ * @param mixed $key
+ * @param mixed $expire
+ * @return void
+ * @author seatle
+ * @created time :2015-12-13 01:05
+ */
+ public static function expire($key, $expire)
+ {
+ self::init();
+ try
+ {
+ if ( self::$links[self::$link_name] )
+ {
+ return self::$links[self::$link_name]->expire($key, $expire);
+ }
+ }
+ catch (Exception $e)
+ {
+ $msg = "PHP Fatal error: Uncaught exception 'RedisException' with message '".$e->getMessage()."'\n";
+ log::warn($msg);
+ if ($e->getCode() == 0)
+ {
+ self::$links[self::$link_name]->close();
+ self::$links[self::$link_name] = null;
+ usleep(100000);
+ return self::expire($key, $expire);
+ }
+ }
+ return NULL;
+ }
+
+ /**
+ * exists key值是否存在
+ *
+ * @param mixed $key
+ * @return void
+ * @author seatle
+ * @created time :2015-12-13 01:05
+ */
+ public static function exists($key)
+ {
+ self::init();
+ try
+ {
+ if ( self::$links[self::$link_name] )
+ {
+ return self::$links[self::$link_name]->exists($key);
+ }
+ }
+ catch (Exception $e)
+ {
+ $msg = "PHP Fatal error: Uncaught exception 'RedisException' with message '".$e->getMessage()."'\n";
+ log::warn($msg);
+ if ($e->getCode() == 0)
+ {
+ self::$links[self::$link_name]->close();
+ self::$links[self::$link_name] = null;
+ usleep(100000);
+ return self::exists($key);
+ }
+ }
+ return false;
+ }
+
+ /**
+ * ping 检查当前redis是否存在且是否可以连接上
+ *
+ * @return void
+ * @author seatle
+ * @created time :2015-12-13 01:05
+ */
+ //protected static function ping()
+ //{
+ //if ( empty (self::$links[self::$link_name]) )
+ //{
+ //return false;
+ //}
+ //return self::$links[self::$link_name]->ping() == '+PONG';
+ //}
+
+ public static function encode($value)
+ {
+ return json_encode($value, JSON_UNESCAPED_UNICODE);
+ }
+
+ public static function decode($value)
+ {
+ return json_decode($value, true);
+ }
+
+ /**
+ * 集合操作
+ */
+
+ /**
+ * sadd 将数据压入集合
+ *
+ * @param mixed $key
+ * @param mixed $value
+ * @return void
+ * @author seatle
+ * @created time :2015-12-13 01:05
+ */
+ public static function sadd($key, $value)
+ {
+ self::init();
+ try
+ {
+ if (self::$links[self::$link_name])
+ {
+ return self::$links[self::$link_name]->sadd($key, $value);
+ }
+ }
+ catch (Exception $e)
+ {
+ $msg = "PHP Fatal error: Uncaught exception 'RedisException' with message '".$e->getMessage()."'\n";
+ log::warn($msg);
+ if ($e->getCode() == 0)
+ {
+ self::$links[self::$link_name]->close();
+ self::$links[self::$link_name] = null;
+ usleep(100000);
+ return self::sadd($key, $value);
+ }
+ }
+ return null;
+ }
+
+ /**
+ * spop 从集合中随机取出数据并移除
+ *
+ * @param mixed $key
+ * @return void
+ * @author seatle
+ * @created time :2015-12-13 01:05
+ */
+ public static function spop($key)
+ {
+ self::init();
+ try
+ {
+ if (self::$links[self::$link_name])
+ {
+ return self::$links[self::$link_name]->spop($key);
+ }
+ }
+ catch (Exception $e)
+ {
+ $msg = "PHP Fatal error: Uncaught exception 'RedisException' with message '".$e->getMessage()."'\n";
+ log::warn($msg);
+ if ($e->getCode() == 0)
+ {
+ self::$links[self::$link_name]->close();
+ self::$links[self::$link_name] = null;
+ usleep(100000);
+ return self::spop($key);
+ }
+ }
+ return null;
+ }
+
+ /**
+ * Redis Scard 命令返回集合中元素的数量。
+ *
+ * @param mixed $key
+ * @return void
+ * @author seatle
+ * @created time :2015-12-13 01:05
+ */
+ public static function scard($key)
+ {
+ self::init();
+ try
+ {
+ if (self::$links[self::$link_name])
+ {
+ return self::$links[self::$link_name]->scard($key);
+ }
+ }
+ catch (Exception $e)
+ {
+ $msg = "PHP Fatal error: Uncaught exception 'RedisException' with message '".$e->getMessage()."'\n";
+ log::warn($msg);
+ if ($e->getCode() == 0)
+ {
+ self::$links[self::$link_name]->close();
+ self::$links[self::$link_name] = null;
+ usleep(100000);
+ return self::scard($key);
+ }
+ }
+ return null;
+ }
+
+}
diff --git a/vendor/owner888/phpspider/core/requests.php b/vendor/owner888/phpspider/core/requests.php
new file mode 100644
index 0000000..5d15787
--- /dev/null
+++ b/vendor/owner888/phpspider/core/requests.php
@@ -0,0 +1,998 @@
+
+// +----------------------------------------------------------------------
+
+// +----------------------------------------------------------------------
+// | GET请求
+// | requests::get('http://www.test.com');
+// | SERVER
+// | $_GET
+// +----------------------------------------------------------------------
+// | POST请求
+// | $data = array('name'=>'request');
+// | requests::post('http://www.test.com', $data);
+// | SERVER
+// | $_POST
+// +----------------------------------------------------------------------
+// | POST RESTful请求
+// | $data = array('name'=>'request');
+// | $data_string = json_encode($data);
+// | requests::set_header("Content-Type", "application/json");
+// | requests::post('http://www.test.com', $data_string);
+// | SERVER
+// | file_get_contents('php://input')
+// +----------------------------------------------------------------------
+// | POST 文件上传
+// | $data = array('file1'=>''./data/phpspider.log'');
+// | requests::post('http://www.test.com', null, $data);
+// | SERVER
+// | $_FILES
+// +----------------------------------------------------------------------
+// | 代理
+// | requests::set_proxy(array('223.153.69.150:42354'));
+// | $html = requests::get('https://www.test.com');
+// +----------------------------------------------------------------------
+
+//----------------------------------
+// PHPSpider请求类文件
+//----------------------------------
+
+namespace phpspider\core;
+
+if (!function_exists('curl_file_create'))
+{
+ function curl_file_create($filename, $mimetype = '', $postname = '')
+ {
+ return "@$filename;filename="
+ . ($postname ?: basename($filename))
+ . ($mimetype ? ";type=$mimetype" : '');
+ }
+}
+
+class requests
+{
+ const VERSION = '2.0.1';
+
+ protected static $ch = null;
+
+ /**** Public variables ****/
+
+ /* user definable vars */
+
+ public static $timeout = 15;
+ public static $encoding = null;
+ public static $input_encoding = null;
+ public static $output_encoding = null;
+ public static $cookies = array(); // array of cookies to pass
+ // $cookies['username'] = "seatle";
+ public static $rawheaders = array(); // array of raw headers to send
+ public static $domain_cookies = array(); // array of cookies for domain to pass
+ public static $hosts = array(); // random host binding for make request faster
+ public static $headers = array(); // headers returned from server sent here
+ public static $useragents = array("requests/2.0.0"); // random agent we masquerade as
+ public static $client_ips = array(); // random ip we masquerade as
+ public static $proxies = array(); // random proxy ip
+ public static $raw = ""; // head + body content returned from server sent here
+ public static $head = ""; // head content
+ public static $content = ""; // The body before encoding
+ public static $text = ""; // The body after encoding
+ public static $info = array(); // curl info
+ public static $history = 302; // http request status before redirect. ex:30x
+ public static $status_code = 0; // http request status
+ public static $error = ""; // error messages sent here
+
+ /**
+ * set timeout
+ * $timeout 为数组时会分别设置connect和read
+ *
+ * @param init or array $timeout
+ * @return
+ */
+ public static function set_timeout($timeout)
+ {
+ self::$timeout = $timeout;
+ }
+
+ /**
+ * 设置代理
+ * 如果代理有多个,请求时会随机使用
+ *
+ * @param mixed $proxies
+ * array (
+ * 'socks5://user1:pass2@host:port',
+ * 'socks5://user2:pass2@host:port'
+ *)
+ * @return void
+ * @author seatle
+ * @created time :2016-09-18 10:17
+ */
+ public static function set_proxy($proxy)
+ {
+ self::$proxies = is_array($proxy) ? $proxy : array($proxy);
+ }
+
+ /**
+ * 删除代理
+ * 因为每个链接信息里面都有代理信息,有的链接需要,有的不需要,所以必须提供一个删除功能
+ *
+ * @return void
+ * @author seatle
+ * @created time :2018-07-16 17:59
+ */
+ public static function del_proxy()
+ {
+ self::$proxies = array();
+ }
+
+ /**
+ * 自定义请求头部
+ * 请求头内容可以用 requests::$rawheaders 来获取
+ * 比如获取Content-Type:requests::$rawheaders['Content-Type']
+ *
+ * @param string $headers
+ * @return void
+ */
+ public static function set_header($key, $value)
+ {
+ self::$rawheaders[$key] = $value;
+ }
+
+ /**
+ * 设置全局COOKIE
+ *
+ * @param string $cookie
+ * @return void
+ */
+ public static function set_cookie($key, $value, $domain = '')
+ {
+ if (empty($key))
+ {
+ return false;
+ }
+ if (!empty($domain))
+ {
+ self::$domain_cookies[$domain][$key] = $value;
+ }
+ else
+ {
+ self::$cookies[$key] = $value;
+ }
+ return true;
+ }
+
+ /**
+ * 批量设置全局cookie
+ *
+ * @param mixed $cookies
+ * @param string $domain
+ * @return void
+ * @author seatle
+ * @created time :2017-08-03 18:06
+ */
+ public static function set_cookies($cookies, $domain = '')
+ {
+ $cookies_arr = explode(';', $cookies);
+ if (empty($cookies_arr))
+ {
+ return false;
+ }
+
+ foreach ($cookies_arr as $cookie)
+ {
+ $cookie_arr = explode('=', $cookie, 2);
+ $key = $cookie_arr[0];
+ $value = empty($cookie_arr[1]) ? '' : $cookie_arr[1];
+
+ if (!empty($domain))
+ {
+ self::$domain_cookies[$domain][$key] = $value;
+ }
+ else
+ {
+ self::$cookies[$key] = $value;
+ }
+ }
+ return true;
+ }
+
+ /**
+ * 获取单一Cookie
+ *
+ * @param mixed $name cookie名称
+ * @param string $domain 不传则取全局cookie,就是手动set_cookie的cookie
+ * @return void
+ * @author seatle
+ * @created time :2017-08-03 18:06
+ */
+ public static function get_cookie($name, $domain = '')
+ {
+ if (!empty($domain) && !isset(self::$domain_cookies[$domain]))
+ {
+ return '';
+ }
+ $cookies = empty($domain) ? self::$cookies : self::$domain_cookies[$domain];
+ return isset($cookies[$name]) ? $cookies[$name] : '';
+ }
+
+ /**
+ * 获取Cookie数组
+ *
+ * @param string $domain 不传则取全局cookie,就是手动set_cookie的cookie
+ * @return void
+ * @author seatle
+ * @created time :2017-08-03 18:06
+ */
+ public static function get_cookies($domain = '')
+ {
+ if (!empty($domain) && !isset(self::$domain_cookies[$domain]))
+ {
+ return array();
+ }
+ return empty($domain) ? self::$cookies : self::$domain_cookies[$domain];
+ }
+
+ /**
+ * 删除Cookie
+ *
+ * @param string $domain 不传则删除全局Cookie
+ * @return void
+ * @author seatle
+ * @created time :2017-08-03 18:06
+ */
+ public static function del_cookie($key, $domain = '')
+ {
+ if (empty($key))
+ {
+ return false;
+ }
+
+ if (!empty($domain) && !isset(self::$domain_cookies[$domain]))
+ {
+ return false;
+ }
+
+ if (!empty($domain))
+ {
+ if (isset(self::$domain_cookies[$domain][$key]))
+ {
+ unset(self::$domain_cookies[$domain][$key]);
+ }
+ }
+ else
+ {
+ if (isset(self::$cookies[$key]))
+ {
+ unset(self::$cookies[$key]);
+ }
+ }
+ return true;
+ }
+
+ /**
+ * 删除Cookie
+ *
+ * @param string $domain 不传则删除全局Cookie
+ * @return void
+ * @author seatle
+ * @created time :2017-08-03 18:06
+ */
+ public static function del_cookies($domain = '')
+ {
+ if (!empty($domain) && !isset(self::$domain_cookies[$domain]))
+ {
+ return false;
+ }
+ if ( empty($domain) )
+ {
+ self::$cookies = array();
+ }
+ else
+ {
+ if (isset(self::$domain_cookies[$domain]))
+ {
+ unset(self::$domain_cookies[$domain]);
+ }
+ }
+ return true;
+ }
+
+ /**
+ * 设置随机的user_agent
+ *
+ * @param string $useragent
+ * @return void
+ */
+ public static function set_useragent($useragent)
+ {
+ self::$useragents = is_array($useragent) ? $useragent : array($useragent);
+ }
+
+ /**
+ * set referer
+ *
+ */
+ public static function set_referer($referer)
+ {
+ self::$rawheaders['Referer'] = $referer;
+ }
+
+ /**
+ * 设置伪造IP
+ * 传入数组则为随机IP
+ * @param string $ip
+ * @return void
+ */
+ public static function set_client_ip($ip)
+ {
+ self::$client_ips = is_array($ip) ? $ip : array($ip);
+ }
+
+ /**
+ * 删除伪造IP
+ *
+ * @return void
+ * @author seatle
+ * @created time :2018-07-16 17:59
+ */
+ public static function del_client_ip()
+ {
+ self::$client_ips = array();
+ }
+
+ /**
+ * 设置中文请求
+ *
+ * @param string $lang
+ * @return void
+ */
+ public static function set_accept_language($lang = 'zh-CN')
+ {
+ self::$rawheaders['Accept-Language'] = $lang;
+ }
+
+ /**
+ * 设置Hosts
+ * 负载均衡到不同的服务器,如果对方使用CDN,采用这个是最好的了
+ *
+ * @param string $hosts
+ * @return void
+ */
+ public static function set_hosts($host, $ips = array())
+ {
+ $ips = is_array($ips) ? $ips : array($ips);
+ self::$hosts[$host] = $ips;
+ }
+
+ /**
+ * 分割返回的header和body
+ * header用来判断编码和获取Cookie
+ * body用来判断编码,得到编码前和编码后的内容
+ *
+ * @return void
+ * @author seatle
+ * @created time :2017-08-03 18:06
+ */
+ public static function split_header_body()
+ {
+ $head = $body = '';
+ $head = substr(self::$raw, 0, self::$info['header_size']);
+ $body = substr(self::$raw, self::$info['header_size']);
+ // http header
+ self::$head = $head;
+ // The body before encoding
+ self::$content = $body;
+
+ //$http_headers = array();
+ //// 解析HTTP数据流
+ //if (!empty(self::$raw))
+ //{
+ //self::get_response_cookies($domain);
+ //// body里面可能有 \r\n\r\n,但是第一个一定是HTTP Header,去掉后剩下的就是body
+ //$array = explode("\r\n\r\n", self::$raw);
+ //foreach ($array as $k=>$v)
+ //{
+ //// post 方法会有两个http header:HTTP/1.1 100 Continue、HTTP/1.1 200 OK
+ //if (preg_match("#^HTTP/.*? 100 Continue#", $v))
+ //{
+ //unset($array[$k]);
+ //continue;
+ //}
+ //if (preg_match("#^HTTP/.*? \d+ #", $v))
+ //{
+ //$header = $v;
+ //unset($array[$k]);
+ //$http_headers = self::get_response_headers($v);
+ //}
+ //}
+ //$body = implode("\r\n\r\n", $array);
+ //}
+
+ // 设置了输出编码的转码,注意: xpath只支持utf-8,iso-8859-1 不要转,他本身就是utf-8
+ $body = self::encoding($body); //自动转码
+ // 转码后
+ self::$encoding = self::$output_encoding;
+
+ // The body after encoding
+ self::$text = $body;
+ return array($head, $body);
+ }
+
+ /**
+ * 获得域名相对应的Cookie
+ *
+ * @param mixed $header
+ * @param mixed $domain
+ * @return void
+ * @author seatle
+ * @created time :2017-08-03 18:06
+ */
+ public static function get_response_cookies($header, $domain)
+ {
+ // 解析Cookie并存入 self::$cookies 方便调用
+ preg_match_all("/.*?Set\-Cookie: ([^\r\n]*)/i", $header, $matches);
+ $cookies = empty($matches[1]) ? array() : $matches[1];
+
+ // 解析到Cookie
+ if (!empty($cookies))
+ {
+ $cookies = implode(';', $cookies);
+ $cookies = explode(';', $cookies);
+ foreach ($cookies as $cookie)
+ {
+ $cookie_arr = explode('=', $cookie, 2);
+ // 过滤 httponly、secure
+ if (count($cookie_arr) < 2)
+ {
+ continue;
+ }
+ $cookie_name = !empty($cookie_arr[0]) ? trim($cookie_arr[0]) : '';
+ if (empty($cookie_name))
+ {
+ continue;
+ }
+ // 过滤掉domain路径
+ if (in_array(strtolower($cookie_name), array('path', 'domain', 'expires', 'max-age')))
+ {
+ continue;
+ }
+ self::$domain_cookies[$domain][trim($cookie_arr[0])] = trim($cookie_arr[1]);
+ }
+ }
+ }
+
+ /**
+ * 获得response header
+ * 此方法占时没有用到
+ *
+ * @param mixed $header
+ * @return void
+ * @author seatle
+ * @created time :2017-08-03 18:06
+ */
+ public static function get_response_headers($header)
+ {
+ $headers = array();
+ $header_lines = explode("\n", $header);
+ if (!empty($header_lines))
+ {
+ foreach ($header_lines as $line)
+ {
+ $header_arr = explode(':', $line, 2);
+ $key = empty($header_arr[0]) ? '' : trim($header_arr[0]);
+ $val = empty($header_arr[1]) ? '' : trim($header_arr[1]);
+ if (empty($key) || empty($val))
+ {
+ continue;
+ }
+ $headers[$key] = $val;
+ }
+ }
+ self::$headers = $headers;
+ return self::$headers;
+ }
+
+ /**
+ * 获取编码
+ * @param $string
+ * @return string
+ */
+ public static function get_encoding($string)
+ {
+ $encoding = mb_detect_encoding($string, array('UTF-8', 'GBK', 'GB2312', 'LATIN1', 'ASCII', 'BIG5', 'ISO-8859-1'));
+ return strtolower($encoding);
+ }
+
+ /**
+ * 移除页面head区域代码
+ * @param $html
+ * @return mixed
+ */
+ private static function _remove_head($html)
+ {
+ return preg_replace('/.+<\/head>/is', '', $html);
+ }
+
+ /**
+ * 简单的判断一下参数是否为一个URL链接
+ * @param string $str
+ * @return boolean
+ */
+ private static function _is_url($url)
+ {
+ //$pattern = '/^http(s)?:\\/\\/.+/';
+ $pattern = "/\b(([\w-]+:\/\/?|www[.])[^\s()<>]+(?:\([\w\d]+\)|([^[:punct:]\s]|\/)))/";
+ if (preg_match($pattern, $url))
+ {
+ return true;
+ }
+ return false;
+ }
+
+ /**
+ * 初始化 CURL
+ *
+ */
+ public static function init()
+ {
+ if (!is_resource ( self::$ch ))
+ {
+ self::$ch = curl_init ();
+ curl_setopt( self::$ch, CURLOPT_RETURNTRANSFER, true );
+ curl_setopt( self::$ch, CURLOPT_HEADER, false );
+ curl_setopt( self::$ch, CURLOPT_USERAGENT, "phpspider-requests/".self::VERSION );
+ // 如果设置了两个时间,就分开设置
+ if (is_array(self::$timeout))
+ {
+ curl_setopt( self::$ch, CURLOPT_CONNECTTIMEOUT, self::$timeout[0] );
+ curl_setopt( self::$ch, CURLOPT_TIMEOUT, self::$timeout[1]);
+ }
+ else
+ {
+ curl_setopt(self::$ch, CURLOPT_CONNECTTIMEOUT, ceil(self::$timeout / 2));
+ curl_setopt(self::$ch, CURLOPT_TIMEOUT, self::$timeout);
+ }
+ curl_setopt(self::$ch, CURLOPT_MAXREDIRS, 5); //maximum number of redirects allowed
+ // 在多线程处理场景下使用超时选项时,会忽略signals对应的处理函数,但是无耐的是还有小概率的crash情况发生
+ curl_setopt( self::$ch, CURLOPT_NOSIGNAL, true);
+ }
+ return self::$ch;
+ }
+
+ /**
+ * get 请求
+ */
+ public static function get($url, $fields = array(), $allow_redirects = true, $cert = NULL)
+ {
+ self::init ();
+ return self::request($url, 'get', $fields, NULL, $allow_redirects, $cert);
+ }
+
+ /**
+ * post 请求
+ * $fields 有三种类型:1、数组;2、http query;3、json
+ * 1、array('name'=>'yangzetao')
+ * 2、http_build_query(array('name'=>'yangzetao'))
+ * 3、json_encode(array('name'=>'yangzetao'))
+ * 前两种是普通的post,可以用$_POST方式获取
+ * 第三种是post stream( json rpc,其实就是webservice )
+ * 虽然是post方式,但是只能用流方式 http://input 后者 $HTTP_RAW_POST_DATA 获取
+ *
+ * @param mixed $url
+ * @param array $fields
+ * @param mixed $proxies
+ * @static
+ * @access public
+ * @return void
+ */
+ public static function post($url, $fields = array(), $files = array(), $allow_redirects = true, $cert = NULL)
+ {
+ self::init ();
+ return self::request($url, 'POST', $fields, $files, $allow_redirects, $cert);
+ }
+
+ public static function put($url, $fields = array(), $allow_redirects = true, $cert = NULL)
+ {
+ self::init ();
+ return self::request($url, 'PUT', $fields, $allow_redirects, $cert);
+ }
+
+ public static function delete($url, $fields = array(), $allow_redirects = true, $cert = NULL)
+ {
+ self::init ();
+ return self::request($url, 'DELETE', $fields, $allow_redirects, $cert);
+ }
+
+ // 响应HTTP头域里的元信息
+ // 此方法被用来获取请求实体的元信息而不需要传输实体主体(entity-body)
+ // 此方法经常被用来测试超文本链接的有效性,可访问性,和最近的改变。.
+ public static function head($url, $fields = array(), $allow_redirects = true, $cert = NULL)
+ {
+ self::init ();
+ self::request($url, 'HEAD', $fields, $allow_redirects, $cert);
+ }
+
+ public static function options($url, $fields = array(), $allow_redirects = true, $cert = NULL)
+ {
+ self::init ();
+ return self::request($url, 'OPTIONS', $fields, $allow_redirects, $cert);
+ }
+
+ public static function patch($url, $fields = array(), $allow_redirects = true, $cert = NULL)
+ {
+ self::init ();
+ return self::request($url, 'PATCH', $fields, $allow_redirects, $cert);
+ }
+
+ /**
+ * request
+ *
+ * @param mixed $url 请求URL
+ * @param string $method 请求方法
+ * @param array $fields 表单字段
+ * @param array $files 上传文件
+ * @param mixed $cert CA证书
+ * @return void
+ * @author seatle
+ * @created time :2017-08-03 18:06
+ */
+ public static function request($url, $method = 'GET', $fields = array(), $files = array(), $allow_redirects = true, $cert = NULL)
+ {
+ $method = strtoupper($method);
+ if(!self::_is_url($url))
+ {
+ self::$error = "You have requested URL ({$url}) is not a valid HTTP address";
+ return false;
+ }
+
+ // 如果是 get 方式,直接拼凑一个 url 出来
+ if ($method == 'GET' && !empty($fields))
+ {
+ $url = $url.(strpos($url, '?') === false ? '?' : '&').http_build_query($fields);
+ }
+
+ $parse_url = parse_url($url);
+ if (empty($parse_url) || empty($parse_url['host']) || !in_array($parse_url['scheme'], array('http', 'https')))
+ {
+ self::$error = "No connection adapters were found for '{$url}'";
+ return false;
+ }
+ $scheme = $parse_url['scheme'];
+ $domain = $parse_url['host'];
+
+ // 随机绑定 hosts,做负载均衡
+ if (self::$hosts)
+ {
+ if (isset(self::$hosts[$domain]))
+ {
+ $hosts = self::$hosts[$domain];
+ $key = rand(0, count($hosts)-1);
+ $ip = $hosts[$key];
+ $url = str_replace($domain, $ip, $url);
+ self::$rawheaders['Host'] = $domain;
+ }
+ }
+
+ curl_setopt( self::$ch, CURLOPT_URL, $url );
+
+ if ($method != 'GET')
+ {
+ // 如果是 post 方式
+ if ($method == 'POST')
+ {
+ //curl_setopt( self::$ch, CURLOPT_POST, true );
+ $tmpheaders = array_change_key_case(self::$rawheaders, CASE_LOWER);
+ // 有些RESTful服务只接受JSON形态的数据
+ // CURLOPT_POST会把上傳的文件类型设为 multipart/form-data
+ // 把CURLOPT_POSTFIELDS的内容按multipart/form-data 的形式编码
+ // CURLOPT_CUSTOMREQUEST可以按指定内容上传
+ if ( isset($tmpheaders['content-type']) && $tmpheaders['content-type'] == 'application/json' )
+ {
+ curl_setopt( self::$ch, CURLOPT_CUSTOMREQUEST, $method );
+ }
+ else
+ {
+ curl_setopt( self::$ch, CURLOPT_POST, true );
+ }
+
+ $file_fields = array();
+ if (!empty($files))
+ {
+ foreach ($files as $postname => $file)
+ {
+ $filepath = realpath($file);
+ // 如果文件不存在
+ if (!file_exists($filepath))
+ {
+ continue;
+ }
+
+ $filename = basename($filepath);
+ $type = self::get_mimetype($filepath);
+ $file_fields[$postname] = curl_file_create($filepath, $type, $filename);
+ // curl -F "name=seatle&file=@/absolute/path/to/image.png" htt://localhost/uploadfile.php
+ //$cfile = '@'.realpath($filename).";type=".$type.";filename=".$filename;
+ }
+ }
+ }
+ else
+ {
+ self::$rawheaders['X-HTTP-Method-Override'] = $method;
+ curl_setopt( self::$ch, CURLOPT_CUSTOMREQUEST, $method );
+ }
+
+ if ( $method == 'POST' )
+ {
+ // 不是上传文件的,用http_build_query, 能实现更好的兼容性,更小的请求数据包
+ if ( empty($file_fields) )
+ {
+ // post方式
+ if ( is_array($fields) )
+ {
+ $fields = http_build_query($fields);
+ }
+ }
+ else
+ {
+ // 有post数据
+ if ( is_array($fields) && !empty($fields) )
+ {
+ // 某些server可能会有问题
+ $fields = array_merge($fields, $file_fields);
+ }
+ else
+ {
+ $fields = $file_fields;
+ }
+ }
+
+ // 不能直接传数组,不知道是什么Bug,会非常慢
+ curl_setopt( self::$ch, CURLOPT_POSTFIELDS, $fields );
+ }
+ }
+
+ $cookies = self::get_cookies();
+ $domain_cookies = self::get_cookies($domain);
+ $cookies = array_merge($cookies, $domain_cookies);
+ // 是否设置了cookie
+ if (!empty($cookies))
+ {
+ foreach ($cookies as $key=>$value)
+ {
+ $cookie_arr[] = $key.'='.$value;
+ }
+ $cookies = implode('; ', $cookie_arr);
+ curl_setopt(self::$ch, CURLOPT_COOKIE, $cookies);
+ }
+
+ if (!empty(self::$useragents))
+ {
+ $key = rand(0, count(self::$useragents) - 1);
+ self::$rawheaders['User-Agent'] = self::$useragents[$key];
+ }
+
+ if (!empty(self::$client_ips))
+ {
+ $key = rand(0, count(self::$client_ips) - 1);
+ self::$rawheaders['CLIENT-IP'] = self::$client_ips[$key];
+ self::$rawheaders['X-FORWARDED-FOR'] = self::$client_ips[$key];
+ }
+
+ if (self::$rawheaders)
+ {
+ $http_headers = array();
+ foreach (self::$rawheaders as $k=>$v)
+ {
+ $http_headers[] = $k.': '.$v;
+ }
+ curl_setopt( self::$ch, CURLOPT_HTTPHEADER, $http_headers );
+ }
+
+ curl_setopt( self::$ch, CURLOPT_ENCODING, 'gzip' );
+
+ // 关闭验证
+ if ($scheme == 'https')
+ {
+ curl_setopt(self::$ch, CURLOPT_SSL_VERIFYPEER, false);
+ curl_setopt(self::$ch, CURLOPT_SSL_VERIFYHOST, false);
+ }
+
+ if (self::$proxies)
+ {
+ $key = rand(0, count(self::$proxies) - 1);
+ $proxy = self::$proxies[$key];
+ curl_setopt( self::$ch, CURLOPT_PROXY, $proxy );
+ }
+
+ // header + body,header 里面有 cookie
+ curl_setopt( self::$ch, CURLOPT_HEADER, true );
+ // 请求跳转后的内容
+ if ($allow_redirects)
+ {
+ curl_setopt( self::$ch, CURLOPT_FOLLOWLOCATION, true);
+ }
+
+ self::$raw = curl_exec ( self::$ch );
+ // 真实url
+ //$location = curl_getinfo( self::$ch, CURLINFO_EFFECTIVE_URL);
+ self::$info = curl_getinfo( self::$ch );
+ //print_r(self::$info);
+ self::$status_code = self::$info['http_code'];
+ if (self::$raw === false)
+ {
+ self::$error = 'Curl error: ' . curl_error( self::$ch );
+ //trigger_error(self::$error, E_USER_WARNING);
+ }
+
+ // 关闭句柄
+ curl_close( self::$ch );
+
+ // 请求成功之后才把URL存起来
+ list($header, $text) = self::split_header_body();
+ self::$history = self::get_history($header);
+ self::$headers = self::get_response_headers($header);
+ self::get_response_cookies($header, $domain);
+ //$data = substr($data, 10);
+ //$data = gzinflate($data);
+ return $text;
+ }
+
+ public static function get_history($header)
+ {
+ $status_code = 0;
+ $lines = explode("\n", $header);
+ foreach ($lines as $line)
+ {
+ $line = trim($line);
+ if (preg_match("#^HTTP/.*? (\d+) Found#", $line, $out))
+ {
+ $status_code = empty($out[1]) ? 0 : intval($out[1]);
+ }
+ }
+ return $status_code;
+ }
+
+ // 获取 mimetype
+ public static function get_mimetype($filepath)
+ {
+ $fp = finfo_open(FILEINFO_MIME);
+ $mime = finfo_file($fp, $filepath);
+ finfo_close($fp);
+ $arr = explode(';', $mime);
+ $type = empty($arr[0]) ? '' : $arr[0];
+ return $type;
+ }
+
+ /**
+ * 拼凑文件和表单
+ * 占时没有用到
+ *
+ * @param mixed $post_fields
+ * @param mixed $file_fields
+ * @return void
+ * @author seatle
+ * @created time :2017-08-03 18:06
+ */
+ public static function get_postfile_form($post_fields, $file_fields)
+ {
+ // 构造post数据
+ $data = '';
+ $delimiter = '-------------' . uniqid();
+ // 表单数据
+ foreach ($post_fields as $name => $content)
+ {
+ $data .= '--'.$delimiter."\r\n";
+ $data .= 'Content-Disposition: form-data; name = "'.$name.'"';
+ $data .= "\r\n\r\n";
+ $data .= $content;
+ $data .= "\r\n";
+ }
+
+ foreach ($file_fields as $input_name => $file)
+ {
+ $data .= '--'.$delimiter."\r\n";
+ $data .= 'Content-Disposition: form-data; name = "'.$input_name.'";'.
+ ' filename="'.$file['filename'].'"'."\r\n";
+ $data .= "Content-Type: {$file['type']}\r\n";
+ $data .= "\r\n";
+ $data .= $file['content'];
+ $data .= "\r\n";
+ }
+
+ // 结束符
+ $data .= '--'.$delimiter."--\r\n";
+
+ //return array(
+ //CURLOPT_HTTPHEADER => array(
+ //'Content-Type:multipart/form-data;boundary=' . $delimiter,
+ //'Content-Length:' . strlen($data)
+ //),
+ //CURLOPT_POST => true,
+ //CURLOPT_POSTFIELDS => $data,
+ //);
+ return array($delimiter, $data);
+ }
+
+ /**
+ * html encoding transform
+ *
+ * @param string $html
+ * @param string $in
+ * @param string $out
+ * @param string $content
+ * @param string $mode
+ * auto|iconv|mb_convert_encoding
+ * @return string
+ */
+ public static function encoding($html, $in = null, $out = null, $mode = 'auto')
+ {
+ $valid = array(
+ 'auto',
+ 'iconv',
+ 'mb_convert_encoding',
+ );
+ if (isset(self::$output_encoding))
+ {
+ $out = self::$output_encoding;
+ }
+ if ( ! isset($out))
+ {
+ $out = 'UTF-8';
+ }
+ if ( ! in_array($mode, $valid))
+ {
+ throw new Exception('invalid mode, mode='.$mode);
+ }
+ $if = function_exists('mb_convert_encoding');
+ $if = $if && ($mode == 'auto' || $mode == 'mb_convert_encoding');
+ if (function_exists('iconv') && ($mode == 'auto' || $mode == 'iconv'))
+ {
+ $func = 'iconv';
+ }
+ elseif ($if)
+ {
+ $func = 'mb_convert_encoding';
+ }
+ else
+ {
+ throw new Exception('charsetTrans failed, no function');
+ }
+
+ $pattern = '/(]*?charset=([\"\']?))([a-z\d_\-]*)(\2[^>]*?>)/is';
+ if ( ! isset($in))
+ {
+ $n = preg_match($pattern, $html, $in);
+ if ($n > 0)
+ {
+ $in = $in[3];
+ }
+ else
+ {
+ $in = null;
+ }
+ if (empty($in) and function_exists('mb_detect_encoding'))
+ {
+ $in = mb_detect_encoding($html, array('UTF-8', 'GBK', 'GB2312', 'LATIN1', 'ASCII', 'BIG5', 'ISO-8859-1'));
+ }
+ }
+
+ if (isset($in))
+ {
+ if ($in == 'ISO-8859-1')
+ {
+ $in = 'UTF-8';
+ }
+ $old = error_reporting(error_reporting() & ~E_NOTICE);
+ $html = call_user_func($func, $in, $out.'//IGNORE', $html);
+ error_reporting($old);
+ $html = preg_replace($pattern, "\\1$out\\4", $html, 1);
+ }
+ return $html;
+ }
+}
diff --git a/vendor/owner888/phpspider/core/selector.php b/vendor/owner888/phpspider/core/selector.php
new file mode 100644
index 0000000..f17cff4
--- /dev/null
+++ b/vendor/owner888/phpspider/core/selector.php
@@ -0,0 +1,588 @@
+
+// +----------------------------------------------------------------------
+
+//----------------------------------
+// PHPSpider选择器类文件
+//----------------------------------
+
+namespace phpspider\core;
+
+use phpspider\library\phpquery;
+use DOMDocument;
+use DOMXpath;
+use Exception;
+
+class selector
+{
+ /**
+ * 版本号
+ * @var string
+ */
+ const VERSION = '1.0.2';
+ public static $dom = null;
+ public static $dom_auth = '';
+ public static $xpath = null;
+ public static $error = null;
+
+ public static function select($html, $selector, $selector_type = 'xpath')
+ {
+ if (empty($html) || empty($selector))
+ {
+ return false;
+ }
+
+ $selector_type = strtolower($selector_type);
+ if ($selector_type == 'xpath')
+ {
+ return self::_xpath_select($html, $selector);
+ }
+ elseif ($selector_type == 'regex')
+ {
+ return self::_regex_select($html, $selector);
+ }
+ elseif ($selector_type == 'css')
+ {
+ return self::_css_select($html, $selector);
+ }
+ }
+
+ public static function remove($html, $selector, $selector_type = 'xpath')
+ {
+ if (empty($html) || empty($selector))
+ {
+ return false;
+ }
+
+ $remove_html = "";
+ $selector_type = strtolower($selector_type);
+ if ($selector_type == 'xpath')
+ {
+ $remove_html = self::_xpath_select($html, $selector, true);
+ }
+ elseif ($selector_type == 'regex')
+ {
+ $remove_html = self::_regex_select($html, $selector, true);
+ }
+ elseif ($selector_type == 'css')
+ {
+ $remove_html = self::_css_select($html, $selector, true);
+ }
+ $html = str_replace($remove_html, "", $html);
+ return $html;
+ }
+
+ /**
+ * xpath选择器
+ *
+ * @param mixed $html
+ * @param mixed $selector
+ * @return void
+ * @author seatle
+ * @created time :2016-10-26 12:53
+ */
+ private static function _xpath_select($html, $selector, $remove = false)
+ {
+ if (!is_object(self::$dom))
+ {
+ self::$dom = new DOMDocument();
+ }
+
+ // 如果加载的不是之前的HTML内容,替换一下验证标识
+ if (self::$dom_auth != md5($html))
+ {
+ self::$dom_auth = md5($html);
+ @self::$dom->loadHTML(''.$html);
+ self::$xpath = new DOMXpath(self::$dom);
+ }
+
+ //libxml_use_internal_errors(true);
+ //self::$dom->loadHTML(''.$html);
+ //$errors = libxml_get_errors();
+ //if (!empty($errors))
+ //{
+ //print_r($errors);
+ //exit;
+ //}
+
+ $elements = @self::$xpath->query($selector);
+ if ($elements === false)
+ {
+ self::$error = "the selector in the xpath(\"{$selector}\") syntax errors";
+ // 不应该返回false,因为isset(false)为true,更不能通过 !$values 去判断,因为!0为true,所以这里只能返回null
+ //return false;
+ return null;
+ }
+
+ $result = array();
+ if (!is_null($elements))
+ {
+ foreach ($elements as $element)
+ {
+ // 如果是删除操作,取一整块代码
+ if ($remove)
+ {
+ $content = self::$dom->saveXml($element);
+ }
+ else
+ {
+ $nodeName = $element->nodeName;
+ $nodeType = $element->nodeType; // 1.Element 2.Attribute 3.Text
+ //$nodeAttr = $element->getAttribute('src');
+ //$nodes = util::node_to_array(self::$dom, $element);
+ //echo $nodes['@src']."\n";
+ // 如果是img标签,直接取src值
+ if ($nodeType == 1 && in_array($nodeName, array('img')))
+ {
+ $content = $element->getAttribute('src');
+ }
+ // 如果是标签属性,直接取节点值
+ elseif ($nodeType == 2 || $nodeType == 3 || $nodeType == 4)
+ {
+ $content = $element->nodeValue;
+ }
+ else
+ {
+ // 保留nodeValue里的html符号,给children二次提取
+ $content = self::$dom->saveXml($element);
+ //$content = trim(self::$dom->saveHtml($element));
+ $content = preg_replace(array("#^<{$nodeName}.*>#isU","#{$nodeName}>$#isU"), array('', ''), $content);
+ }
+ }
+ $result[] = $content;
+ }
+ }
+ if (empty($result))
+ {
+ return null;
+ }
+ // 如果只有一个元素就直接返回string,否则返回数组
+ return count($result) > 1 ? $result : $result[0];
+ }
+
+ /**
+ * css选择器
+ *
+ * @param mixed $html
+ * @param mixed $selector
+ * @return void
+ * @author seatle
+ * @created time :2016-10-26 12:53
+ */
+ private static function _css_select($html, $selector, $remove = false)
+ {
+ $selector = self::css_to_xpath($selector);
+ //echo $selector."\n";
+ //exit("\n");
+ return self::_xpath_select($html, $selector, $remove);
+ // 如果加载的不是之前的HTML内容,替换一下验证标识
+ //if (self::$dom_auth['css'] != md5($html))
+ //{
+ //self::$dom_auth['css'] = md5($html);
+ //phpQuery::loadDocumentHTML($html);
+ //}
+ //if ($remove)
+ //{
+ //return phpQuery::pq($selector)->remove();
+ //}
+ //else
+ //{
+ //return phpQuery::pq($selector)->html();
+ //}
+ }
+
+ /**
+ * 正则选择器
+ *
+ * @param mixed $html
+ * @param mixed $selector
+ * @return void
+ * @author seatle
+ * @created time :2016-10-26 12:53
+ */
+ private static function _regex_select($html, $selector, $remove = false)
+ {
+ if(@preg_match_all($selector, $html, $out) === false)
+ {
+ self::$error = "the selector in the regex(\"{$selector}\") syntax errors";
+ return null;
+ }
+ $count = count($out);
+ $result = array();
+ // 一个都没有匹配到
+ if ($count == 0)
+ {
+ return null;
+ }
+ // 只匹配一个,就是只有一个 ()
+ elseif ($count == 2)
+ {
+ // 删除的话取匹配到的所有内容
+ if ($remove)
+ {
+ $result = $out[0];
+ }
+ else
+ {
+ $result = $out[1];
+ }
+ }
+ else
+ {
+ for ($i = 1; $i < $count; $i++)
+ {
+ // 如果只有一个元素,就直接返回好了
+ $result[] = count($out[$i]) > 1 ? $out[$i] : $out[$i][0];
+ }
+ }
+ if (empty($result))
+ {
+ return null;
+ }
+
+ return count($result) > 1 ? $result : $result[0];
+ }
+
+ public static function find_all($html, $selector)
+ {
+ }
+
+
+ public static function css_to_xpath($selectors)
+ {
+ $queries = self::parse_selector($selectors);
+ $delimiter_before = false;
+ $xquery = '';
+ foreach($queries as $s)
+ {
+ // TAG
+ $is_tag = preg_match('@^[\w|\||-]+$@', $s) || $s == '*';
+ if ($is_tag)
+ {
+ $xquery .= $s;
+ }
+ // ID
+ else if ($s[0] == '#')
+ {
+ if ($delimiter_before)
+ {
+ $xquery .= '*';
+ }
+ // ID用精确查询
+ $xquery .= "[@id='".substr($s, 1)."']";
+ }
+ // CLASSES
+ else if ($s[0] == '.')
+ {
+ if ($delimiter_before)
+ {
+ $xquery .= '*';
+ }
+ // CLASS用模糊查询
+ $xquery .= "[contains(@class,'".substr($s, 1)."')]";
+ }
+ // ATTRIBUTES
+ else if ($s[0] == '[')
+ {
+ if ($delimiter_before)
+ {
+ $xquery .= '*';
+ }
+ // strip side brackets
+ $attr = trim($s, '][');
+ // attr with specifed value
+ if (mb_strpos($s, '='))
+ {
+ $value = null;
+ list($attr, $value) = explode('=', $attr);
+ $value = trim($value, "'\"");
+ if (self::is_regexp($attr))
+ {
+ // cut regexp character
+ $attr = substr($attr, 0, -1);
+ $xquery .= "[@{$attr}]";
+ }
+ else
+ {
+ $xquery .= "[@{$attr}='{$value}']";
+ }
+ }
+ // attr without specified value
+ else
+ {
+ $xquery .= "[@{$attr}]";
+ }
+ }
+ // ~ General Sibling Selector
+ else if ($s[0] == '~')
+ {
+ }
+ // + Adjacent sibling selectors
+ else if ($s[0] == '+')
+ {
+ }
+ // PSEUDO CLASSES
+ else if ($s[0] == ':')
+ {
+ }
+ // DIRECT DESCENDANDS
+ else if ($s == '>')
+ {
+ $xquery .= '/';
+ $delimiter_before = 2;
+ }
+ // ALL DESCENDANDS
+ else if ($s == ' ')
+ {
+ $xquery .= '//';
+ $delimiter_before = 2;
+ }
+ // ERRORS
+ else
+ {
+ exit("Unrecognized token '$s'");
+ }
+ $delimiter_before = $delimiter_before === 2;
+ }
+ return $xquery;
+ }
+
+ /**
+ * @access private
+ */
+ public static function parse_selector($query)
+ {
+ $query = trim( preg_replace( '@\s+@', ' ', preg_replace('@\s*(>|\\+|~)\s*@', '\\1', $query) ) );
+ $queries = array();
+ if ( !$query )
+ {
+ return $queries;
+ }
+
+ $special_chars = array('>',' ');
+ $special_chars_mapping = array();
+ $strlen = mb_strlen($query);
+ $class_chars = array('.', '-');
+ $pseudo_chars = array('-');
+ $tag_chars = array('*', '|', '-');
+ // split multibyte string
+ // http://code.google.com/p/phpquery/issues/detail?id=76
+ $_query = array();
+ for ( $i=0; $i<$strlen; $i++ )
+ {
+ $_query[] = mb_substr($query, $i, 1);
+ }
+ $query = $_query;
+ // it works, but i dont like it...
+ $i = 0;
+ while( $i < $strlen )
+ {
+ $c = $query[$i];
+ $tmp = '';
+ // TAG
+ if ( self::is_char($c) || in_array($c, $tag_chars) )
+ {
+ while(isset($query[$i]) && (self::is_char($query[$i]) || in_array($query[$i], $tag_chars)))
+ {
+ $tmp .= $query[$i];
+ $i++;
+ }
+ $queries[] = $tmp;
+ }
+ // IDs
+ else if ( $c == '#' )
+ {
+ $i++;
+ while( isset($query[$i]) && (self::is_char($query[$i]) || $query[$i] == '-') )
+ {
+ $tmp .= $query[$i];
+ $i++;
+ }
+ $queries[] = '#'.$tmp;
+ }
+ // SPECIAL CHARS
+ else if ( in_array($c, $special_chars) )
+ {
+ $queries[] = $c;
+ $i++;
+ // MAPPED SPECIAL MULTICHARS
+ // } else if ( $c.$query[$i+1] == '//') {
+ // $return[] = ' ';
+ // $i = $i+2;
+ }
+ // MAPPED SPECIAL CHARS
+ else if ( isset($special_chars_mapping[$c]))
+ {
+ $queries[] = $special_chars_mapping[$c];
+ $i++;
+ }
+ // COMMA
+ else if ( $c == ',' )
+ {
+ $i++;
+ while( isset($query[$i]) && $query[$i] == ' ')
+ {
+ $i++;
+ }
+ }
+ // CLASSES
+ else if ($c == '.')
+ {
+ while( isset($query[$i]) && (self::is_char($query[$i]) || in_array($query[$i], $class_chars)))
+ {
+ $tmp .= $query[$i];
+ $i++;
+ }
+ $queries[] = $tmp;
+ }
+ // ~ General Sibling Selector
+ else if ($c == '~')
+ {
+ $space_allowed = true;
+ $tmp .= $query[$i++];
+ while( isset($query[$i])
+ && (self::is_char($query[$i])
+ || in_array($query[$i], $class_chars)
+ || $query[$i] == '*'
+ || ($query[$i] == ' ' && $space_allowed)
+ ))
+ {
+ if ($query[$i] != ' ')
+ {
+ $space_allowed = false;
+ }
+ $tmp .= $query[$i];
+ $i++;
+ }
+ $queries[] = $tmp;
+ }
+ // + Adjacent sibling selectors
+ else if ($c == '+')
+ {
+ $space_allowed = true;
+ $tmp .= $query[$i++];
+ while( isset($query[$i])
+ && (self::is_char($query[$i])
+ || in_array($query[$i], $class_chars)
+ || $query[$i] == '*'
+ || ($space_allowed && $query[$i] == ' ')
+ ))
+ {
+ if ($query[$i] != ' ')
+ $space_allowed = false;
+ $tmp .= $query[$i];
+ $i++;
+ }
+ $queries[] = $tmp;
+ }
+ // ATTRS
+ else if ($c == '[')
+ {
+ $stack = 1;
+ $tmp .= $c;
+ while( isset($query[++$i]))
+ {
+ $tmp .= $query[$i];
+ if ( $query[$i] == '[')
+ {
+ $stack++;
+ }
+ else if ( $query[$i] == ']')
+ {
+ $stack--;
+ if (! $stack )
+ {
+ break;
+ }
+ }
+ }
+ $queries[] = $tmp;
+ $i++;
+ }
+ // PSEUDO CLASSES
+ else if ($c == ':')
+ {
+ $stack = 1;
+ $tmp .= $query[$i++];
+ while( isset($query[$i]) && (self::is_char($query[$i]) || in_array($query[$i], $pseudo_chars)))
+ {
+ $tmp .= $query[$i];
+ $i++;
+ }
+ // with arguments ?
+ if ( isset($query[$i]) && $query[$i] == '(')
+ {
+ $tmp .= $query[$i];
+ $stack = 1;
+ while( isset($query[++$i]))
+ {
+ $tmp .= $query[$i];
+ if ( $query[$i] == '(')
+ {
+ $stack++;
+ }
+ else if ( $query[$i] == ')')
+ {
+ $stack--;
+ if (! $stack )
+ {
+ break;
+ }
+ }
+ }
+ $queries[] = $tmp;
+ $i++;
+ }
+ else
+ {
+ $queries[] = $tmp;
+ }
+ }
+ else
+ {
+ $i++;
+ }
+ }
+
+ if (isset($queries[0]))
+ {
+ if (isset($queries[0][0]) && $queries[0][0] == ':')
+ {
+ array_unshift($queries, '*');
+ }
+ if ($queries[0] != '>')
+ {
+ array_unshift($queries, ' ');
+ }
+ }
+
+ return $queries;
+ }
+
+ public static function is_char($char)
+ {
+ return preg_match('@\w@', $char);
+ }
+
+ /**
+ * 模糊匹配
+ * ^ 前缀字符串
+ * * 包含字符串
+ * $ 后缀字符串
+ * @access private
+ */
+ protected static function is_regexp($pattern)
+ {
+ return in_array(
+ $pattern[ mb_strlen($pattern)-1 ],
+ array('^','*','$')
+ );
+ }
+}
diff --git a/vendor/owner888/phpspider/core/util.php b/vendor/owner888/phpspider/core/util.php
new file mode 100644
index 0000000..6d6f811
--- /dev/null
+++ b/vendor/owner888/phpspider/core/util.php
@@ -0,0 +1,936 @@
+
+// +----------------------------------------------------------------------
+
+//----------------------------------
+// PHPSpider实用函数集合类文件
+//----------------------------------
+
+namespace phpspider\core;
+// 引入PATH_DATA
+require_once __DIR__ . '/constants.php';
+
+class util
+{
+ /**
+ * 文件锁
+ * 如果没有锁,就加一把锁并且执行逻辑,然后删除锁
+ * if (!util::lock('statistics_offer'))
+ * {
+ * util::lock('statistics_offer');
+ * ...
+ * util::unlock('statistics_offer');
+ * }
+ * 否则输出锁存在
+ * else
+ * {
+ * echo "process has been locked\n";
+ * }
+ *
+ * @param mixed $lock_name
+ * @param int $lock_timeout
+ * @return void
+ * @author seatle
+ * @created time :2016-02-18 14:28
+ */
+ public static function lock($lock_name, $lock_timeout = 600)
+ {
+ $lock = util::get_file(PATH_DATA."/lock/{$lock_name}.lock");
+ if ($lock)
+ {
+ $time = time() - $lock;
+ // 还没到10分钟,说明进程还活着
+ if ($time < $lock_timeout)
+ {
+ return true;
+ }
+ unlink(PATH_DATA."/lock/{$lock_name}.lock");
+ }
+ util::put_file(PATH_DATA."/lock/{$lock_name}.lock", time());
+ return false;
+ }
+
+ public static function unlock($lock_name)
+ {
+ unlink(PATH_DATA."/lock/{$lock_name}.lock");
+ }
+
+ public static function time2second($time, $is_log = true)
+ {
+ if(is_numeric($time))
+ {
+ $value = array(
+ "years" => 0, "days" => 0, "hours" => 0,
+ "minutes" => 0, "seconds" => 0,
+ );
+ if($time >= 31556926)
+ {
+ $value["years"] = floor($time/31556926);
+ $time = ($time%31556926);
+ }
+ if($time >= 86400)
+ {
+ $value["days"] = floor($time/86400);
+ $time = ($time%86400);
+ }
+ if($time >= 3600)
+ {
+ $value["hours"] = floor($time/3600);
+ $time = ($time%3600);
+ }
+ if($time >= 60)
+ {
+ $value["minutes"] = floor($time/60);
+ $time = ($time%60);
+ }
+ $value["seconds"] = floor($time);
+ //return (array) $value;
+ //$t = $value["years"] ."y ". $value["days"] ."d ". $value["hours"] ."h ". $value["minutes"] ."m ".$value["seconds"]."s";
+ if ($is_log)
+ {
+ $t = $value["days"] ."d ". $value["hours"] ."h ". $value["minutes"] ."m ".$value["seconds"]."s";
+ }
+ else
+ {
+ $t = $value["days"] ." days ". $value["hours"] ." hours ". $value["minutes"] ." minutes";
+ }
+ return $t;
+
+ }
+ else
+ {
+ return false;
+ }
+ }
+
+ public static function get_days($day_sta, $day_end = true, $range = 86400)
+ {
+ if ($day_end === true) $day_end = date('Y-m-d');
+
+ return array_map(function ($time) {
+ return date('Y-m-d', $time);
+ }, range(strtotime($day_sta), strtotime($day_end), $range));
+ }
+
+ /**
+ * 获取文件行数
+ *
+ * @param mixed $filepath
+ * @return void
+ * @author seatle
+ * @created time :2016-03-31 21:54
+ */
+ public static function get_file_line($filepath)
+ {
+ $line = 0 ;
+ $fp = fopen($filepath , 'r');
+ if (!$fp)
+ {
+ return 0;
+ }
+ //获取文件的一行内容,注意:需要php5才支持该函数;
+ while( stream_get_line($fp,8192,"\n") ){
+ $line++;
+ }
+ fclose($fp);//关闭文件
+ return $line;
+ }
+
+ /**
+ * 获得表数
+ *
+ * @param mixed $table_name 表名
+ * @param mixed $item_value 唯一索引
+ * @param int $table_num 表数量
+ * @return void
+ * @author seatle
+ * @created time :2015-10-22 23:25
+ */
+ public static function get_table_num($item_value, $table_num = 100)
+ {
+ //sha1:返回一个40字符长度的16进制数字
+ $item_value = sha1(strtolower($item_value));
+ //base_convert:进制建转换,下面是把16进制转成10进制,方便做除法运算
+ //str_pad:把字符串填充为指定的长度,下面是在左边加0,表数量大于100就3位,否则2位
+ $step = $table_num > 100 ? 3 : 2;
+ $item_value = str_pad(base_convert(substr($item_value, -2), 16, 10) % $table_num, $step, "0", STR_PAD_LEFT);
+ return $item_value;
+ }
+
+ /**
+ * 获得表面
+ *
+ * @param mixed $table_name 表名
+ * @param mixed $item_value 唯一索引
+ * @param int $table_num 表数量
+ * @return void
+ * @author seatle
+ * @created time :2015-10-22 23:25
+ */
+ public static function get_table_name($table_name, $item_value, $table_num = 100)
+ {
+ //sha1:返回一个40字符长度的16进制数字
+ $item_value = sha1(strtolower($item_value));
+ //base_convert:进制建转换,下面是把16进制转成10进制,方便做除法运算
+ //str_pad:把字符串填充为指定的长度,下面是在左边加0,共3位
+ $step = $table_num > 100 ? 3 : 2;
+ $item_value = str_pad(base_convert(substr($item_value, -2), 16, 10) % $table_num, $step, "0", STR_PAD_LEFT);
+ return $table_name."_".$item_value;
+ }
+
+ // 获得当前使用内存
+ public static function memory_get_usage()
+ {
+ $memory = memory_get_usage();
+ return self::format_bytes($memory);
+ }
+
+ // 获得最高使用内存
+ public static function memory_get_peak_usage()
+ {
+ $memory = memory_get_peak_usage();
+ return self::format_bytes($memory);
+ }
+
+ // 转换大小单位
+ public static function format_bytes($size)
+ {
+ $unit = array('b', 'kb', 'mb', 'gb', 'tb', 'pb');
+ return @round($size / pow(1024, ($i = floor(log($size, 1024)))), 2) . ' ' . $unit[$i];
+ }
+
+ /**
+ * 获取数组大小
+ *
+ * @param mixed $arr 数组
+ * @return string
+ */
+ public static function array_size($arr)
+ {
+ ob_start();
+ print_r($arr);
+ $mem = ob_get_contents();
+ ob_end_clean();
+ $mem = preg_replace("/\n +/", "", $mem);
+ $mem = strlen($mem);
+ return self::format_bytes($mem);
+ }
+
+ /**
+ * 数字随机数
+ *
+ * @param int $num
+ * @return void
+ * @author seatle
+ * @created time :2016-09-18 10:17
+ */
+ public static function rand_num($num = 7)
+ {
+ $rand = "";
+ for ($i = 0; $i < $num; $i ++)
+ {
+ $rand .= mt_rand(0, 9);
+ }
+ return $rand;
+ }
+
+ /**
+ * 字母数字混合随机数
+ *
+ * @param int $num
+ * @return void
+ * @author seatle
+ * @created time :2016-09-18 10:17
+ */
+ public static function rand_str($num = 10)
+ {
+ $chars = 'abcdefghijklmnopqrstuvwxyz0123456789';
+ $string = "";
+ for ($i = 0; $i < $num; $i ++)
+ {
+ $string .= substr($chars, rand(0, strlen($chars)), 1);
+ }
+ return $string;
+ }
+
+ /**
+ * 汉字转拼音
+ *
+ * @param mixed $str 汉字
+ * @param int $ishead
+ * @param int $isclose
+ * @static
+ * @access public
+ * @return string
+ */
+ public static function pinyin($str, $ishead = 0, $isclose = 1)
+ {
+ // $str = iconv("utf-8", "gbk//ignore", $str);
+ $str = mb_convert_encoding($str, "gbk", "utf-8");
+ global $pinyins;
+ $restr = '';
+ $str = trim($str);
+ $slen = strlen($str);
+ if ($slen < 2)
+ {
+ return $str;
+ }
+ if (count($pinyins) == 0)
+ {
+ $fp = fopen(PATH_DATA . '/pinyin.dat', 'r');
+ while (!feof($fp))
+ {
+ $line = trim(fgets($fp));
+ $pinyins[$line[0] . $line[1]] = substr($line, 3, strlen($line) - 3);
+ }
+ fclose($fp);
+ }
+ for ($i = 0; $i < $slen; $i ++)
+ {
+ if (ord($str[$i]) > 0x80)
+ {
+ $c = $str[$i] . $str[$i + 1];
+ $i ++;
+ if (isset($pinyins[$c]))
+ {
+ if ($ishead == 0)
+ {
+ $restr .= $pinyins[$c];
+ }
+ else
+ {
+ $restr .= $pinyins[$c][0];
+ }
+ }
+ else
+ {
+ // $restr .= "_";
+ }
+ }
+ else if (preg_match("/[a-z0-9]/i", $str[$i]))
+ {
+ $restr .= $str[$i];
+ }
+ else
+ {
+ // $restr .= "_";
+ }
+ }
+ if ($isclose == 0)
+ {
+ unset($pinyins);
+ }
+ return $restr;
+ }
+
+ /**
+ * 生成字母前缀
+ *
+ * @param mixed $s0
+ * @return char
+ * @author seatle
+ * @created time :2016-09-18 10:17
+ */
+ public static function letter_first($s0)
+ {
+ $firstchar_ord = ord(strtoupper($s0{0}));
+ if (($firstchar_ord >= 65 and $firstchar_ord <= 91) or ($firstchar_ord >= 48 and $firstchar_ord <= 57)) return $s0{0};
+ // $s = iconv("utf-8", "gbk//ignore", $s0);
+ $s = mb_convert_encoding($s0, "gbk", "utf-8");
+ $asc = ord($s{0}) * 256 + ord($s{1}) - 65536;
+ if ($asc >= -20319 and $asc <= -20284) return "A";
+ if ($asc >= -20283 and $asc <= -19776) return "B";
+ if ($asc >= -19775 and $asc <= -19219) return "C";
+ if ($asc >= -19218 and $asc <= -18711) return "D";
+ if ($asc >= -18710 and $asc <= -18527) return "E";
+ if ($asc >= -18526 and $asc <= -18240) return "F";
+ if ($asc >= -18239 and $asc <= -17923) return "G";
+ if ($asc >= -17922 and $asc <= -17418) return "H";
+ if ($asc >= -17417 and $asc <= -16475) return "J";
+ if ($asc >= -16474 and $asc <= -16213) return "K";
+ if ($asc >= -16212 and $asc <= -15641) return "L";
+ if ($asc >= -15640 and $asc <= -15166) return "M";
+ if ($asc >= -15165 and $asc <= -14923) return "N";
+ if ($asc >= -14922 and $asc <= -14915) return "O";
+ if ($asc >= -14914 and $asc <= -14631) return "P";
+ if ($asc >= -14630 and $asc <= -14150) return "Q";
+ if ($asc >= -14149 and $asc <= -14091) return "R";
+ if ($asc >= -14090 and $asc <= -13319) return "S";
+ if ($asc >= -13318 and $asc <= -12839) return "T";
+ if ($asc >= -12838 and $asc <= -12557) return "W";
+ if ($asc >= -12556 and $asc <= -11848) return "X";
+ if ($asc >= -11847 and $asc <= -11056) return "Y";
+ if ($asc >= -11055 and $asc <= -10247) return "Z";
+ return 0; // null
+ }
+
+ /**
+ * 获得某天前的时间戳
+ *
+ * @param mixed $day
+ * @return void
+ * @author seatle
+ * @created time :2016-09-18 10:17
+ */
+ public static function getxtime($day)
+ {
+ $day = intval($day);
+ return mktime(23, 59, 59, date("m"), date("d") - $day, date("y"));
+ }
+
+ /**
+ * 读文件
+ */
+ public static function get_file($url, $timeout = 10)
+ {
+ if (function_exists('curl_init'))
+ {
+ $ch = curl_init();
+ curl_setopt($ch, CURLOPT_URL, $url);
+ curl_setopt($ch, CURLOPT_HEADER, 0);
+ curl_setopt($ch, CURLOPT_TIMEOUT, $timeout);
+ curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
+ curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 10);
+ $content = curl_exec($ch);
+ curl_close($ch);
+ if ($content) return $content;
+ }
+ $ctx = stream_context_create(array('http' => array('timeout' => $timeout)));
+ $content = @file_get_contents($url, 0, $ctx);
+ if ($content) return $content;
+ return false;
+ }
+
+ /**
+ * 写文件,如果文件目录不存在,则递归生成
+ */
+ public static function put_file($file, $content, $flag = 0)
+ {
+ $pathinfo = pathinfo($file);
+ if (!empty($pathinfo['dirname']))
+ {
+ if (file_exists($pathinfo['dirname']) === false)
+ {
+ if (@mkdir($pathinfo['dirname'], 0777, true) === false)
+ {
+ return false;
+ }
+ }
+ }
+ if ($flag === FILE_APPEND)
+ {
+ // 多个php-fpm写一个文件的时候容易丢失,要加锁
+ //return @file_put_contents($file, $content, FILE_APPEND|LOCK_EX);
+ return @file_put_contents($file, $content, FILE_APPEND);
+ }
+ else
+ {
+ return @file_put_contents($file, $content, LOCK_EX);
+ }
+ }
+
+ /**
+ * 检查路径是否存在,不存在则递归生成路径
+ *
+ * @param mixed $path 路径
+ * @static
+ * @access public
+ * @return bool or string
+ */
+ public static function path_exists($path)
+ {
+ $pathinfo = pathinfo($path . '/tmp.txt');
+ if (!empty($pathinfo['dirname']))
+ {
+ if (file_exists($pathinfo['dirname']) === false)
+ {
+ if (mkdir($pathinfo['dirname'], 0777, true) === false)
+ {
+ return false;
+ }
+ }
+ }
+ return $path;
+ }
+
+ /**
+ * 递归删除目录
+ *
+ * @param mixed $dir
+ * @return void
+ * @author seatle
+ * @created time :2016-09-18 10:17
+ */
+ public static function deldir($dir)
+ {
+ //先删除目录下的文件:
+ $dh = opendir($dir);
+ while ($file = readdir($dh))
+ {
+ if($file!="." && $file!="..")
+ {
+ $fullpath = $dir."/".$file;
+ if(!is_dir($fullpath))
+ {
+ unlink($fullpath);
+ }
+ else
+ {
+ self::deldir($fullpath);
+ }
+ }
+ }
+
+ closedir($dh);
+ //删除当前文件夹:
+ if(rmdir($dir))
+ {
+ return true;
+ }
+ else
+ {
+ return false;
+ }
+ }
+
+ /**
+ * 递归修改目录权限
+ *
+ * @param mixed $path 目录
+ * @param mixed $filemode 权限
+ * @return bool
+ */
+ public static function chmodr($path, $filemode)
+ {
+ if (!is_dir($path))
+ {
+ return @chmod($path, $filemode);
+ }
+
+ $dh = opendir($path);
+ while (($file = readdir($dh)) !== false)
+ {
+ if ($file != '.' && $file != '..')
+ {
+ $fullpath = $path . '/' . $file;
+ if (is_link($fullpath))
+ {
+ return FALSE;
+ }
+ elseif (!is_dir($fullpath) && !@chmod($fullpath, $filemode))
+ {
+ return FALSE;
+ }
+ elseif (!self::chmodr($fullpath, $filemode))
+ {
+ return FALSE;
+ }
+ }
+ }
+
+ closedir($dh);
+
+ if (@chmod($path, $filemode))
+ {
+ return TRUE;
+ }
+ else
+ {
+ return FALSE;
+ }
+ }
+
+ /**
+ * 数组格式化为CSV
+ *
+ * @param mixed $data
+ * @return void
+ * @author seatle
+ * @created time :2016-07-29 11:32
+ */
+ public static function format_csv($data)
+ {
+ foreach ($data as $k=>$v)
+ {
+ $v = str_replace(",", "", $v);
+ $v = str_replace(",", "", $v);
+ $data[$k] = $v;
+ }
+ return implode(",", $data);
+ }
+
+ /**
+ * 判断是否为utf8字符串
+ * @parem $str
+ * @return bool
+ */
+ public static function is_utf8($str)
+ {
+ if ($str === mb_convert_encoding(mb_convert_encoding($str, "UTF-32", "UTF-8"), "UTF-8", "UTF-32"))
+ {
+ return true;
+ }
+ else
+ {
+ return false;
+ }
+ }
+
+ /**
+ * 获取文件编码
+ * @param $string
+ * @return string
+ */
+ public static function get_encoding($string)
+ {
+ $encoding = mb_detect_encoding($string, array('UTF-8', 'GBK', 'GB2312', 'LATIN1', 'ASCII', 'BIG5'));
+ return strtolower($encoding);
+ }
+
+ /**
+ * 转换数组值的编码格式
+ * @param array $arr
+ * @param string $toEncoding
+ * @param string $fromEncoding
+ * @return array
+ */
+ public static function array_iconv($arr, $from_encoding, $to_encoding)
+ {
+ eval('$arr = '.iconv($from_encoding, $to_encoding.'//IGNORE', var_export($arr,TRUE)).';');
+ return $arr;
+ }
+
+ /**
+ * 从普通时间返回Linux时间截(strtotime中文处理版)
+ * @parem string $dtime
+ * @return int
+ */
+ public static function cn_strtotime($dtime)
+ {
+ if (!preg_match("/[^0-9]/", $dtime))
+ {
+ return $dtime;
+ }
+ $dtime = trim($dtime);
+ $dt = Array(1970, 1, 1, 0, 0, 0);
+ $dtime = preg_replace("/[\r\n\t]|日|秒/", " ", $dtime);
+ $dtime = str_replace("年", "-", $dtime);
+ $dtime = str_replace("月", "-", $dtime);
+ $dtime = str_replace("时", ":", $dtime);
+ $dtime = str_replace("分", ":", $dtime);
+ $dtime = trim(preg_replace("/[ ]{1,}/", " ", $dtime));
+ $ds = explode(" ", $dtime);
+ $ymd = explode("-", $ds[0]);
+ if (!isset($ymd[1]))
+ {
+ $ymd = explode(".", $ds[0]);
+ }
+ if (isset($ymd[0]))
+ {
+ $dt[0] = $ymd[0];
+ }
+ if (isset($ymd[1])) $dt[1] = $ymd[1];
+ if (isset($ymd[2])) $dt[2] = $ymd[2];
+ if (strlen($dt[0]) == 2) $dt[0] = '20' . $dt[0];
+ if (isset($ds[1]))
+ {
+ $hms = explode(":", $ds[1]);
+ if (isset($hms[0])) $dt[3] = $hms[0];
+ if (isset($hms[1])) $dt[4] = $hms[1];
+ if (isset($hms[2])) $dt[5] = $hms[2];
+ }
+ foreach ($dt as $k => $v)
+ {
+ $v = preg_replace("/^0{1,}/", '', trim($v));
+ if ($v == '')
+ {
+ $dt[$k] = 0;
+ }
+ }
+ $mt = mktime($dt[3], $dt[4], $dt[5], $dt[1], $dt[2], $dt[0]);
+ if (!empty($mt))
+ {
+ return $mt;
+ }
+ else
+ {
+ return strtotime($dtime);
+ }
+ }
+
+ public static function cn_substr($string, $length = 80, $etc = '...', $count_words = true)
+ {
+ mb_internal_encoding("UTF-8");
+ if ($length == 0) return '';
+ if (strlen($string) <= $length) return $string;
+ preg_match_all("/[\x01-\x7f]|[\xc2-\xdf][\x80-\xbf]|\xe0[\xa0-\xbf][\x80-\xbf]|[\xe1-\xef][\x80-\xbf][\x80-\xbf]|\xf0[\x90-\xbf][\x80-\xbf][\x80-\xbf]|[\xf1-\xf7][\x80-\xbf][\x80-\xbf][\x80-\xbf]/", $string, $info);
+ if ($count_words)
+ {
+ $j = 0;
+ $wordscut = "";
+ for ($i = 0; $i < count($info[0]); $i ++)
+ {
+ $wordscut .= $info[0][$i];
+ if (ord($info[0][$i]) >= 128)
+ {
+ $j = $j + 2;
+ }
+ else
+ {
+ $j = $j + 1;
+ }
+ if ($j >= $length)
+ {
+ return $wordscut . $etc;
+ }
+ }
+ return join('', $info[0]);
+ }
+ return join("", array_slice($info[0], 0, $length)) . $etc;
+ }
+
+ /**
+ * 获取文件后缀名
+ *
+ * @param mixed $file_name 文件名
+ * @static
+ *
+ * @access public
+ * @return string
+ */
+ public static function get_extension($file_name)
+ {
+ $ext = explode('.', $file_name);
+ $ext = array_pop($ext);
+ return strtolower($ext);
+ }
+
+ // 获取 Url 跳转后的真实地址
+ public static function getrealurl($url)
+ {
+ if (empty($url))
+ {
+ return $url;
+ }
+ $header = get_headers($url, 1);
+ if (empty($header[0]) || empty($header[1]))
+ {
+ return $url;
+ }
+ if (strpos($header[0], '301') || strpos($header[0], '302'))
+ {
+ if (empty($header['Location']))
+ {
+ return $url;
+ }
+ if (is_array($header['Location']))
+ {
+ return $header['Location'][count($header['Location']) - 1];
+ }
+ else
+ {
+ return $header['Location'];
+ }
+ }
+ else
+ {
+ return $url;
+ }
+ }
+
+ // 解压服务器用 Content-Encoding:gzip 压缩过的数据
+ public static function gzdecode($data)
+ {
+ $flags = ord(substr($data, 3, 1));
+ $headerlen = 10;
+ $extralen = 0;
+ $filenamelen = 0;
+ if ($flags & 4)
+ {
+ $extralen = unpack('v', substr($data, 10, 2));
+ $extralen = $extralen[1];
+ $headerlen += 2 + $extralen;
+ }
+ if ($flags & 8) // Filename
+ $headerlen = strpos($data, chr(0), $headerlen) + 1;
+ if ($flags & 16) // Comment
+ $headerlen = strpos($data, chr(0), $headerlen) + 1;
+ if ($flags & 2) // CRC at end of file
+ $headerlen += 2;
+ $unpacked = @gzinflate(substr($data, $headerlen));
+ if ($unpacked === FALSE) $unpacked = $data;
+ return $unpacked;
+ }
+
+ /**
+ * 数字金额转换为中文
+ * @param string|integer|float $num 目标数字
+ * @param boolean $sim 使用小写(默认)
+ * @return string
+ */
+ public static function number2chinese($num, $sim = FALSE)
+ {
+ if (!is_numeric($num)) return '含有非数字非小数点字符!';
+ $char = $sim ? array('零', '一', '二', '三', '四', '五', '六', '七', '八', '九') : array('零', '壹', '贰', '叁', '肆', '伍', '陆', '柒', '捌', '玖');
+ $unit = $sim ? array('', '十', '百', '千', '', '万', '亿', '兆') : array('', '拾', '佰', '仟', '', '萬', '億', '兆');
+ $retval = '';
+
+ $num = sprintf("%01.2f", $num);
+
+ list ($num, $dec) = explode('.', $num);
+
+ // 小数部分
+ if ($dec['0'] > 0)
+ {
+ $retval .= "{$char[$dec['0']]}角";
+ }
+ if ($dec['1'] > 0)
+ {
+ $retval .= "{$char[$dec['1']]}分";
+ }
+
+ // 整数部分
+ if ($num > 0)
+ {
+ $retval = "元" . $retval;
+ $f = 1;
+ $str = strrev(intval($num));
+ for ($i = 0, $c = strlen($str); $i < $c; $i ++)
+ {
+ if ($str[$i] > 0)
+ {
+ $f = 0;
+ }
+ if ($f == 1 && $str[$i] == 0)
+ {
+ $out[$i] = "";
+ }
+ else
+ {
+ $out[$i] = $char[$str[$i]];
+ }
+ $out[$i] .= $str[$i] != '0' ? $unit[$i % 4] : '';
+ if ($i > 1 and $str[$i] + $str[$i - 1] == 0)
+ {
+ $out[$i] = '';
+ }
+ if ($i % 4 == 0)
+ {
+ $out[$i] .= $unit[4 + floor($i / 4)];
+ }
+ }
+ $retval = join('', array_reverse($out)) . $retval;
+ }
+ return $retval;
+ }
+
+ public static function colorize($str, $status = "info")
+ {
+ $out = "";
+ switch ($status)
+ {
+ case 'succ':
+ $out = "\033[32m"; // Blue
+ break;
+ case "error":
+ $out = "\033[31m"; // Red
+ break;
+ case "warn":
+ $out = "\033[33m"; // Yellow
+ break;
+ case "note":
+ $out = "\033[34m"; // Green
+ break;
+ case "debug":
+ $out = "\033[36m"; // Green
+ break;
+ default:
+ $out = "\033[0m"; // info
+ break;
+ }
+ return $out.$str."\033[0m";
+ }
+
+ public static function node_to_array($dom, $node)
+ {
+ if(!is_a( $dom, 'DOMDocument' ) || !is_a( $node, 'DOMNode' ))
+ {
+ return false;
+ }
+
+ $array = array();
+ // Discard empty nodes
+ $localName = trim( $node->localName );
+ if( empty($localName))
+ {
+ return false;
+ }
+ if( XML_TEXT_NODE == $node->nodeType )
+ {
+ return $node->nodeValue;
+ }
+ foreach ($node->attributes as $attr)
+ {
+ $array['@'.$attr->localName] = $attr->nodeValue;
+ }
+ foreach ($node->childNodes as $childNode)
+ {
+ if ( (isset($childNode->childNodes->length) && 1 == $childNode->childNodes->length) &&
+ XML_TEXT_NODE == $childNode->firstChild->nodeType )
+ {
+ $array[$childNode->localName] = $childNode->nodeValue;
+ }
+ else
+ {
+ if( false !== ($a = self::node_to_array( $dom, $childNode)))
+ {
+ $array[$childNode->localName] = $a;
+ }
+ }
+ }
+ return $array;
+ }
+
+ public static function is_win()
+ {
+ return strtoupper(substr(PHP_OS,0,3))==="WIN";
+ }
+
+ /**
+ * 和 http_build_query 相反,分解出参数
+ *
+ * @return void
+ * @author seatle
+ * @created time :2016-05-16 17:29
+ */
+ public static function http_split_query($query, $is_query = false)
+ {
+ if (!$is_query)
+ {
+ $parse_arr = parse_url($query);
+ if (empty($parse_arr['query']))
+ {
+ return array();
+ }
+ $query = $parse_arr['query'];
+ }
+
+ $query_arr = explode("&", $query);
+ $params = array();
+ foreach ($query_arr as $val)
+ {
+ $arr = explode("=", $val);
+ $params[$arr[0]] = $arr[1];
+ }
+ return $params;
+ }
+}
+
+
diff --git a/vendor/owner888/phpspider/core/worker.php b/vendor/owner888/phpspider/core/worker.php
new file mode 100644
index 0000000..b09955f
--- /dev/null
+++ b/vendor/owner888/phpspider/core/worker.php
@@ -0,0 +1,421 @@
+
+// +----------------------------------------------------------------------
+
+//----------------------------------
+// Worker多进程操作类
+//----------------------------------
+
+class worker
+{
+ // worker进程数
+ public $count = 0;
+ // worker id,worker进程从1开始,0被master进程所使用
+ public $worker_id = 0;
+ // worker 进程ID
+ public $worker_pid = 0;
+ // 进程用户
+ public $user = '';
+ // 进程名
+ public $title = '';
+ // 每个进程是否只运行一次
+ public $run_once = true;
+ // 是否输出日志
+ public $log_show = false;
+ // master进程启动回调
+ public $on_start = false;
+ // master进程停止回调
+ public $on_stop = false;
+ // worker进程启动回调
+ public $on_worker_start = false;
+ // worker进程停止回调
+ public $on_worker_stop = false;
+ // master进程ID
+ protected static $_master_pid = 0;
+ // worker进程ID
+ protected static $_worker_pids = array();
+ // master、worker进程启动时间
+ public $time_start = 0;
+ // master、worker进程运行状态 [starting|running|shutdown|reload]
+ protected static $_status = "starting";
+
+
+ public function __construct()
+ {
+ self::$_master_pid = posix_getpid();
+ // 产生时钟云,添加后父进程才可以收到信号
+ declare(ticks = 1);
+ $this->install_signal();
+ }
+
+ /**
+ * 安装信号处理函数
+ * @return void
+ */
+ protected function install_signal()
+ {
+ // stop
+ pcntl_signal(SIGINT, array($this, 'signal_handler'), false);
+ // reload
+ pcntl_signal(SIGUSR1, array($this, 'signal_handler'), false);
+ // status
+ pcntl_signal(SIGUSR2, array($this, 'signal_handler'), false);
+ // ignore
+ pcntl_signal(SIGPIPE, SIG_IGN, false);
+ // install signal handler for dead kids
+ // pcntl_signal(SIGCHLD, array($this, 'signal_handler'));
+ }
+
+ /**
+ * 卸载信号处理函数
+ * @return void
+ */
+ protected function uninstall_signal()
+ {
+ // uninstall stop signal handler
+ pcntl_signal(SIGINT, SIG_IGN, false);
+ // uninstall reload signal handler
+ pcntl_signal(SIGUSR1, SIG_IGN, false);
+ // uninstall status signal handler
+ pcntl_signal(SIGUSR2, SIG_IGN, false);
+ }
+
+ /**
+ * 信号处理函数,会被其他类调用到,所以要设置为public
+ * @param int $signal
+ */
+ public function signal_handler($signal) {
+ switch ($signal) {
+ // stop 2
+ case SIGINT:
+ // master进程和worker进程都会调用
+ $this->stop_all();
+ break;
+ // reload 30
+ case SIGUSR1:
+ echo "reload\n";
+ break;
+ // show status 31
+ case SIGUSR2:
+ echo "status\n";
+ break;
+ }
+ }
+
+ /**
+ * 运行worker实例
+ */
+ public function run()
+ {
+ $this->time_start = microtime(true);
+ $this->worker_id = 0;
+ $this->worker_pid = posix_getpid();
+ $this->set_process_title($this->title);
+
+ // 这里赋值,worker进程也会克隆到
+ if ($this->log_show)
+ {
+ log::$log_show = true;
+ }
+
+ if ($this->on_start)
+ {
+ call_user_func($this->on_start, $this);
+ }
+
+ // worker进程从1开始,0被master进程所使用
+ for ($i = 1; $i <= $this->count; $i++)
+ {
+ $this->fork_one_worker($i);
+ }
+ $this->monitor_workers();
+ }
+
+ /**
+ * 创建一个子进程
+ * @param Worker $worker
+ * @throws Exception
+ */
+ public function fork_one_worker($worker_id)
+ {
+ //$sockets = stream_socket_pair(STREAM_PF_UNIX, STREAM_SOCK_STREAM, STREAM_IPPROTO_IP);
+ $pid = pcntl_fork();
+
+ // 主进程记录子进程pid
+ if($pid > 0)
+ {
+ self::$_worker_pids[$worker_id] = $pid;
+ }
+ // 子进程运行
+ elseif(0 === $pid)
+ {
+ $this->time_start = microtime(true);
+ $this->worker_id = $worker_id;
+ $this->worker_pid = posix_getpid();
+ $this->set_process_title($this->title);
+ $this->set_process_user($this->user);
+ // 清空master进程克隆过来的worker进程ID
+ self::$_worker_pids = array();
+ //$this->uninstall_signal();
+
+ // 设置worker进程的运行状态为运行中
+ self::$_status = "running";
+
+ // 注册进程退出回调,用来检查是否有错误(子进程里面注册)
+ register_shutdown_function(array($this, 'check_errors'));
+
+ // 如果设置了worker进程启动回调函数
+ if ($this->on_worker_start)
+ {
+ call_user_func($this->on_worker_start, $this);
+ }
+
+ // 停止当前worker实例
+ $this->stop();
+ // 这里用0表示正常退出
+ exit(0);
+ }
+ else
+ {
+ log::add("fork one worker fail", "Error");
+ exit;
+ }
+ }
+
+ /**
+ * 尝试设置运行当前进程的用户
+ *
+ * @param $user_name
+ */
+ protected static function set_process_user($user_name)
+ {
+ // 用户名为空 或者 当前用户不是root用户
+ if(empty($user_name) || posix_getuid() !== 0)
+ {
+ return;
+ }
+ $user_info = posix_getpwnam($user_name);
+ if($user_info['uid'] != posix_getuid() || $user_info['gid'] != posix_getgid())
+ {
+ if(!posix_setgid($user_info['gid']) || !posix_setuid($user_info['uid']))
+ {
+ log::add('Can not run woker as '.$user_name." , You shuld be root", "Error");
+ }
+ }
+ }
+
+ /**
+ * 设置当前进程的名称,在ps aux命令中有用
+ * 注意 需要php>=5.5或者安装了protitle扩展
+ * @param string $title
+ * @return void
+ */
+ protected function set_process_title($title)
+ {
+ if (!empty($title))
+ {
+ // 需要扩展
+ if(extension_loaded('proctitle') && function_exists('setproctitle'))
+ {
+ @setproctitle($title);
+ }
+ // >=php 5.5
+ elseif (function_exists('cli_set_process_title'))
+ {
+ cli_set_process_title($title);
+ }
+ }
+ }
+
+ /**
+ * 监控所有子进程的退出事件及退出码
+ * @return void
+ */
+ public function monitor_workers()
+ {
+ // 设置master进程的运行状态为运行中
+ self::$_status = "running";
+ while(1)
+ {
+ // pcntl_signal_dispatch 子进程无法接受到信号
+ // 如果有信号到来,尝试触发信号处理函数
+ //pcntl_signal_dispatch();
+ // 挂起进程,直到有子进程退出或者被信号打断
+ $status = 0;
+ $pid = pcntl_wait($status, WUNTRACED);
+ // 如果有信号到来,尝试触发信号处理函数
+ //pcntl_signal_dispatch();
+
+ // 子进程退出信号
+ if($pid > 0)
+ {
+ //echo "worker[".$pid."] stop\n";
+ //$this->stop();
+
+ // 如果不是正常退出,是被kill等杀掉的
+ if($status !== 0)
+ {
+ log::add("worker {$pid} exit with status $status", "Warning");
+ }
+
+ // key 和 value 互换
+ $worker_pids = array_flip(self::$_worker_pids);
+ // 通过 pid 得到 worker_id
+ $worker_id = $worker_pids[$pid];
+ // 这里不unset掉,是为了进程重启
+ self::$_worker_pids[$worker_id] = 0;
+ //unset(self::$_worker_pids[$pid]);
+
+ // 再生成一个worker
+ if (!$this->run_once)
+ {
+ $this->fork_one_worker($worker_id);
+ }
+
+ // 如果所有子进程都退出了,触发主进程退出函数
+ $all_worker_stop = true;
+ foreach (self::$_worker_pids as $_worker_pid)
+ {
+ // 只要有一个worker进程还存在进程ID,就不算退出
+ if ($_worker_pid != 0)
+ {
+ $all_worker_stop = false;
+ }
+ }
+ if ($all_worker_stop)
+ {
+ if ($this->on_stop)
+ {
+ call_user_func($this->on_stop, $this);
+ }
+ exit(0);
+ }
+ }
+ // 其他信号
+ else
+ {
+ // worker进程接受到master进行信号退出的,会到这里来
+ if ($this->on_stop)
+ {
+ call_user_func($this->on_stop, $this);
+ }
+ exit(0);
+ }
+ }
+ }
+
+ /**
+ * 执行关闭流程(所有进程)
+ * 事件触发,非正常程序执行完毕
+ * @return void
+ */
+ public function stop_all()
+ {
+ // 设置master、worker进程的运行状态为关闭状态
+ self::$_status = "shutdown";
+ // master进程
+ if(self::$_master_pid === posix_getpid())
+ {
+ // 循环给worker进程发送关闭信号
+ foreach (self::$_worker_pids as $worker_pid)
+ {
+ posix_kill($worker_pid, SIGINT);
+ }
+ }
+ // worker进程
+ else
+ {
+ // 接收到master进程发送的关闭信号之后退出,这里应该考虑业务的完整性,不能强行exit
+ $this->stop();
+ exit(0);
+ }
+ }
+
+ /**
+ * 停止当前worker实例
+ * 正常运行结束和接受信号退出,都会调用这个方法
+ * @return void
+ */
+ public function stop()
+ {
+ if ($this->on_worker_stop)
+ {
+ call_user_func($this->on_worker_stop, $this);
+ }
+ // 设置worker进程的运行状态为关闭
+ self::$_status = "shutdown";
+ }
+
+ /**
+ * 检查错误,PHP exit之前会执行
+ * @return void
+ */
+ public function check_errors()
+ {
+ // 如果当前worker进程不是正常退出
+ if(self::$_status != "shutdown")
+ {
+ $error_msg = "WORKER EXIT UNEXPECTED ";
+ $errors = error_get_last();
+ if($errors && ($errors['type'] === E_ERROR ||
+ $errors['type'] === E_PARSE ||
+ $errors['type'] === E_CORE_ERROR ||
+ $errors['type'] === E_COMPILE_ERROR ||
+ $errors['type'] === E_RECOVERABLE_ERROR ))
+ {
+ $error_msg .= $this->get_error_type($errors['type']) . " {$errors['message']} in {$errors['file']} on line {$errors['line']}";
+ }
+ log::add($error_msg, 'Error');
+ }
+ }
+
+ /**
+ * 获取错误类型对应的意义
+ * @param integer $type
+ * @return string
+ */
+ protected function get_error_type($type)
+ {
+ switch($type)
+ {
+ case E_ERROR: // 1 //
+ return 'E_ERROR';
+ case E_WARNING: // 2 //
+ return 'E_WARNING';
+ case E_PARSE: // 4 //
+ return 'E_PARSE';
+ case E_NOTICE: // 8 //
+ return 'E_NOTICE';
+ case E_CORE_ERROR: // 16 //
+ return 'E_CORE_ERROR';
+ case E_CORE_WARNING: // 32 //
+ return 'E_CORE_WARNING';
+ case E_COMPILE_ERROR: // 64 //
+ return 'E_COMPILE_ERROR';
+ case E_COMPILE_WARNING: // 128 //
+ return 'E_COMPILE_WARNING';
+ case E_USER_ERROR: // 256 //
+ return 'E_USER_ERROR';
+ case E_USER_WARNING: // 512 //
+ return 'E_USER_WARNING';
+ case E_USER_NOTICE: // 1024 //
+ return 'E_USER_NOTICE';
+ case E_STRICT: // 2048 //
+ return 'E_STRICT';
+ case E_RECOVERABLE_ERROR: // 4096 //
+ return 'E_RECOVERABLE_ERROR';
+ case E_DEPRECATED: // 8192 //
+ return 'E_DEPRECATED';
+ case E_USER_DEPRECATED: // 16384 //
+ return 'E_USER_DEPRECATED';
+ }
+ return "";
+ }
+}
diff --git a/vendor/owner888/phpspider/gitadd.sh b/vendor/owner888/phpspider/gitadd.sh
new file mode 100644
index 0000000..577e558
--- /dev/null
+++ b/vendor/owner888/phpspider/gitadd.sh
@@ -0,0 +1,20 @@
+#!/bin/bash
+if [ ! -d "$1" ] && [ ! -f "$1" ]; then
+ echo "file $1 not exists"
+ exit
+fi
+filename=$1
+
+comment="add file"
+if [[ $2 != "" ]]; then
+ comment=$2
+fi
+
+echo "start update..."
+git pull
+echo "start add new file..."
+git add $filename
+echo "start commit..."
+git commit -m "$comment" $filename
+git push -u origin master
+echo "git commit complete..."
diff --git a/vendor/owner888/phpspider/hacked-emails/banners.txt b/vendor/owner888/phpspider/hacked-emails/banners.txt
new file mode 100644
index 0000000..5248192
--- /dev/null
+++ b/vendor/owner888/phpspider/hacked-emails/banners.txt
@@ -0,0 +1,129 @@
+
+ _-o#&&*''''?d:>b\_
+ _o/"`'' '',, dMF9MMMMMHo_
+ .o' `"MbHMMMMMMMMMMMHo.
+ .o"" ' vodM*$&&HMMMMMMMMMM?.
+ ,' $M&ood,~'`(#MMMMMMH\
+ / ,MMMMMMM#b?#bobMMMMHMMML
+ & ?MMMMMMMMMMMMMMMMM7MMM$R*Hk
+ ?$. :MMMMMMMMMMMMMMMMMMM/HMMM|`*L
+| |MMMMMMMMMMMMMMMMMMMMbMH' T,
+$H#: `*MMMMMMMMMMMMMMMMMMMMb#]' `?
+]MMH# ""*""""*#MMMMMMMMMMMMM' -
+MMMMMb_ |MMMMMMMMMMMP' :
+HMMMMMMMHo `MMMMMMMMMT .
+?MMMMMMMMP 9MMMMMMMM] -
+-?MMMMMMM |MMMMMMMMM?,d- ' {Name}
+ :|MMMMMM- `MMMMMMMT .M|. : {Description}
+ .9MMM[ &MMMMM*' `' . {Loaded}
+ :9MMk `MMM#" -
+ &M] ` .-
+ `&. .
+ `~, . ./
+ . _ .-
+ '`--._,dd###pp=""'
+
+$$$$$AnyShIt$$$$$$
+
+ _v->#H#P? "':o<>\_
+ .,dP` `'' "'-o.+H6&MMMHo_
+ oHMH9' `?&bHMHMMMMMMHo.
+ oMP"' ' ooMP*#&HMMMMMMM?.
+ ,M* - `*MSdob//`^#MMMH\
+ d*' .,MMMMMMH#o>#ooMMMMMb
+ HM- :HMMMMMMMMMMMMMMM&HM[R\
+ d"Z\. 9MMMMMMMMMMMMMMMMM[HMM|:
+-H - MMMMMMMMMMMMMMMMMMMbMP' :
+:??Mb# `9MMMMMMMMMMMMMMMMMMH#! .
+: MMMMH#, "*""""`#HMMMMMMMMMMH -
+||MMMMMM6\. [MMMMMMMMMH' :
+:|MMMMMMMMMMHo `9MMMMMMMM' .
+. HMMMMMMMMMMP' !MMMMMMMM `
+- `#MMMMMMMMM HMMMMMMM*,/ :
+ : ?MMMMMMMF HMMMMMM',P' : {Name}
+ . HMMMMR' [MMMMP' ^' - {Description}
+ : `HMMMT iMMH' .' {Loaded}
+ -.`HMH .
+ -:*H . '
+ -`\,, . .-
+ ' . _ .-`
+ '`~\.__,obb#q==~'''
+
+$$$$$AnyShIt$$$$$$
+
+ _ood>H&H&Z?#M#b-\.
+ .\HMMMMMR?`\M6b."`' ''``v.
+ .. .MMMMMMMMMMHMMM#&. ``~o.
+ . ,HMMMMMMMMMMMM*"'-` &b.
+ . .MMMMMMMMMMMMH' `"&\
+ - RMMMMM#H##R' 4Mb
+ - |7MMM' ?:: `|MMb
+ / HMM__#|`"\>?v.. `MMML
+. `"'#Hd| ` 9MMM:
+- |\,\?HH#bbL `9MMb
+: !MMMMMMMH#b, `""T
+. . ,MMMMMMMMMMMbo. |
+: 4MMMMMMMMMMMMMMMHo |
+: ?MMMMMMMMMMMMMMM? :
+-. `#MMMMMMMMMMMM: .-
+ : |MMMMMMMMMM? .
+ - JMMMMMMMT' : {Name}
+ `. MMMMMMH' - {Description}
+ -. |MMM#*` - {Loaded}
+ . HMH' . '
+ -. #H:. .-
+ ` . .\ .-
+ '-..-+oodHL_,--/-`
+
+
+$$$$$AnyShIt$$$$$$
+
+ .,:,#&6dHHHb#o\_
+ .oHHMMMMMMMMMMMMMMMMMH*\,.
+ oHMMMMMMMMMMMMMMMMMMMMMMHb:'-.
+ .dMMMMMMMMMMMMMMMMMMMMMMMMMH|\/' .
+ ,&HMMMMMMMMMMMMMMMMMMMMMMM/"&.,d. -.
+ dboMMHMMMMMMMMMMMMMMMMMMMMMML `' .
+ HMHMMM$Z***MMMMMMMMMMMMMMMMMM|.- .
+ dMM]MMMM#' `9MMMH?"`MMMMR'T' _ :
+|MMMbM#'' |MM" ``MMMH. <_ .
+dMMMM#& *&. .?`*" .'&: .
+MMMMMH- `' -v/H .dD "' ' :
+MMMM* `*M: 4MM*::-!v,_ :
+MMMM `*?::" "'``"?9Mb::. :
+&MMM, `"'"'|"._ "?`| - :
+`MMM].H ,#dM[_H ..:
+ 9MMi`M: . .ooHMMMMMMM, ..
+ 9Mb `- 1MMMMMMMMMM| : {Name}
+ ?M |MM#*#MMMM* . {Description}
+ -. ` |#"' ,' {Loaded}
+ . -" v`
+ -. .-
+ - . . `
+ '-*#d#HHMMMMHH#"-'
+
+$$$$$AnyShIt$$$$$$
+
+ .-:?,Z?:&$dHH##b\_
+ ,:bqRMMMMMMMMMMMMMMMMMHo.
+ .?HHHMMMMMMMMMMMMMMMMMMMMMMMHo.
+ -o/*M9MMMMMMMMMMMMMMMMMMMMMMMMMMMv
+ .:H\b\'|?#HHMMMMMMMMMMMMMMMMMMMMMM6?Z\
+ .?MMMHbdbbodMMMMHMMMMMMMMMMMMMMMMMMMM\':
+ :MMMMMMMMMMM7MMMMb?6P**#MMMMMMMMMMMMMMM_ :
+ \MMMMMMMMMMMMb^MMMMMM? `*MMMM*"`MMMR<' . -
+.1MMMMMMMMMMMMMb]M#"" 9MR' `?MMb \. :
+-MMMMMMMMMMMMMMMH##|` *&. |`*' .\ .
+-?""*MMMMMMMMMMMMM' ' |?b ,]" :
+: MMMMMMMMMMH' `M_|M]r\?
+. `MMMMMMMMM' `$_:`'"H
+- TMMMMMMMM, '"``::
+: [MMMMMMMM| oH| .#M-
+ : `9MMMMMM' .MP . ,oMMT
+ . HMMMMP' `' ,MMMP {Name}
+ - `MMH' HH9* {Description}
+ '. ` ` .' {Loaded}
+ - . '
+ ` . - .-
+ ` . .-
+ ' -==pHMMH##HH#"""
diff --git a/vendor/owner888/phpspider/hacked-emails/hacked_emails.php b/vendor/owner888/phpspider/hacked-emails/hacked_emails.php
new file mode 100644
index 0000000..11771d7
--- /dev/null
+++ b/vendor/owner888/phpspider/hacked-emails/hacked_emails.php
@@ -0,0 +1,49 @@
+
+ * @copyright seatle
+ * @link http://www.epooll.com/
+ * @license http://www.opensource.org/licenses/mit-license.php MIT License
+ */
+
+class cls_curl
+{
+ protected static $timeout = 10;
+ protected static $ch = null;
+ protected static $useragent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.89 Safari/537.36';
+ protected static $http_raw = false;
+ protected static $cookie = null;
+ protected static $cookie_jar = null;
+ protected static $cookie_file = null;
+ protected static $referer = null;
+ protected static $ip = null;
+ protected static $proxy = null;
+ protected static $headers = array();
+ protected static $hosts = array();
+ protected static $gzip = false;
+ protected static $info = array();
+
+ /**
+ * set timeout
+ *
+ * @param init $timeout
+ * @return
+ */
+ public static function set_timeout($timeout)
+ {
+ self::$timeout = $timeout;
+ }
+
+ /**
+ * 设置代理
+ *
+ * @param mixed $proxy
+ * @return void
+ * @author seatle
+ * @created time :2016-09-18 10:17
+ */
+ public static function set_proxy($proxy)
+ {
+ self::$proxy = $proxy;
+ }
+
+ /**
+ * set referer
+ *
+ */
+ public static function set_referer($referer)
+ {
+ self::$referer = $referer;
+ }
+
+ /**
+ * 设置 user_agent
+ *
+ * @param string $useragent
+ * @return void
+ */
+ public static function set_useragent($useragent)
+ {
+ self::$useragent = $useragent;
+ }
+
+ /**
+ * 设置COOKIE
+ *
+ * @param string $cookie
+ * @return void
+ */
+ public static function set_cookie($cookie)
+ {
+ self::$cookie = $cookie;
+ }
+
+ /**
+ * 设置COOKIE JAR
+ *
+ * @param string $cookie_jar
+ * @return void
+ */
+ public static function set_cookie_jar($cookie_jar)
+ {
+ self::$cookie_jar = $cookie_jar;
+ }
+
+ /**
+ * 设置COOKIE FILE
+ *
+ * @param string $cookie_file
+ * @return void
+ */
+ public static function set_cookie_file($cookie_file)
+ {
+ self::$cookie_file = $cookie_file;
+ }
+
+ /**
+ * 获取内容的时候是不是连header也一起获取
+ *
+ * @param mixed $http_raw
+ * @return void
+ * @author seatle
+ * @created time :2016-09-18 10:17
+ */
+ public static function set_http_raw($http_raw)
+ {
+ self::$http_raw = $http_raw;
+ }
+
+ /**
+ * 设置IP
+ *
+ * @param string $ip
+ * @return void
+ */
+ public static function set_ip($ip)
+ {
+ self::$ip = $ip;
+ }
+
+ /**
+ * 设置Headers
+ *
+ * @param string $headers
+ * @return void
+ */
+ public static function set_headers($headers)
+ {
+ self::$headers = $headers;
+ }
+
+ /**
+ * 设置Hosts
+ *
+ * @param string $hosts
+ * @return void
+ */
+ public static function set_hosts($hosts)
+ {
+ self::$hosts = $hosts;
+ }
+
+ /**
+ * 设置Gzip
+ *
+ * @param string $hosts
+ * @return void
+ */
+ public static function set_gzip($gzip)
+ {
+ self::$gzip = $gzip;
+ }
+
+ /**
+ * 初始化 CURL
+ *
+ */
+ public static function init()
+ {
+ //if (empty ( self::$ch ))
+ if (!is_resource ( self::$ch ))
+ {
+ self::$ch = curl_init ();
+ curl_setopt( self::$ch, CURLOPT_RETURNTRANSFER, true );
+ curl_setopt( self::$ch, CURLOPT_CONNECTTIMEOUT, self::$timeout );
+ curl_setopt( self::$ch, CURLOPT_HEADER, false );
+ curl_setopt( self::$ch, CURLOPT_USERAGENT, self::$useragent );
+ curl_setopt( self::$ch, CURLOPT_TIMEOUT, self::$timeout + 5);
+ // 在多线程处理场景下使用超时选项时,会忽略signals对应的处理函数,但是无耐的是还有小概率的crash情况发生
+ curl_setopt( self::$ch, CURLOPT_NOSIGNAL, true);
+ }
+ return self::$ch;
+ }
+
+ /**
+ * get
+ *
+ *
+ */
+ public static function get($url, $fields = array())
+ {
+ self::init ();
+ return self::http_request($url, 'get', $fields);
+ }
+
+ /**
+ * $fields 有三种类型:1、数组;2、http query;3、json
+ * 1、array('name'=>'yangzetao') 2、http_build_query(array('name'=>'yangzetao')) 3、json_encode(array('name'=>'yangzetao'))
+ * 前两种是普通的post,可以用$_POST方式获取
+ * 第三种是post stream( json rpc,其实就是webservice ),虽然是post方式,但是只能用流方式 http://input 后者 $HTTP_RAW_POST_DATA 获取
+ *
+ * @param mixed $url
+ * @param array $fields
+ * @param mixed $proxy
+ * @static
+ * @access public
+ * @return void
+ */
+ public static function post($url, $fields = array())
+ {
+ self::init ();
+ return self::http_request($url, 'post', $fields);
+ }
+
+ public static function http_request($url, $type = 'get', $fields)
+ {
+ // 如果是 get 方式,直接拼凑一个 url 出来
+ if (strtolower($type) == 'get' && !empty($fields))
+ {
+ $url = $url . (strpos($url,"?")===false ? "?" : "&") . http_build_query($fields);
+ }
+
+ // 随机绑定 hosts,做负载均衡
+ if (self::$hosts)
+ {
+ $parse_url = parse_url($url);
+ $host = $parse_url['host'];
+ $key = rand(0, count(self::$hosts)-1);
+ $ip = self::$hosts[$key];
+ $url = str_replace($host, $ip, $url);
+ self::$headers = array_merge( array('Host:'.$host), self::$headers );
+ }
+ curl_setopt( self::$ch, CURLOPT_URL, $url );
+ // 如果是 post 方式
+ if (strtolower($type) == 'post')
+ {
+ curl_setopt( self::$ch, CURLOPT_POST, true );
+ curl_setopt( self::$ch, CURLOPT_POSTFIELDS, $fields );
+ }
+ if (self::$useragent)
+ {
+ curl_setopt( self::$ch, CURLOPT_USERAGENT, self::$useragent );
+ }
+ if (self::$cookie)
+ {
+ curl_setopt( self::$ch, CURLOPT_COOKIE, self::$cookie );
+ }
+ if (self::$cookie_jar)
+ {
+ curl_setopt( self::$ch, CURLOPT_COOKIEJAR, self::$cookie_jar );
+ }
+ if (self::$cookie_file)
+ {
+ curl_setopt( self::$ch, CURLOPT_COOKIEFILE, self::$cookie_file );
+ }
+ if (self::$referer)
+ {
+ curl_setopt( self::$ch, CURLOPT_REFERER, self::$referer );
+ }
+ if (self::$ip)
+ {
+ self::$headers = array_merge( array('CLIENT-IP:'.self::$ip, 'X-FORWARDED-FOR:'.self::$ip), self::$headers );
+ }
+ if (self::$headers)
+ {
+ curl_setopt( self::$ch, CURLOPT_HTTPHEADER, self::$headers );
+ }
+ if (self::$gzip)
+ {
+ curl_setopt( self::$ch, CURLOPT_ENCODING, 'gzip' );
+ }
+ if (self::$proxy)
+ {
+ curl_setopt( self::$ch, CURLOPT_PROXY, self::$proxy );
+ }
+ if (self::$http_raw)
+ {
+ curl_setopt( self::$ch, CURLOPT_HEADER, true );
+ }
+
+ $data = curl_exec ( self::$ch );
+ self::$info = curl_getinfo(self::$ch);
+ if ($data === false)
+ {
+ //echo date("Y-m-d H:i:s"), ' Curl error: ' . curl_error( self::$ch ), "\n";
+ }
+
+ // 关闭句柄
+ curl_close( self::$ch );
+ //$data = substr($data, 10);
+ //$data = gzinflate($data);
+ return $data;
+ }
+
+ public static function get_info()
+ {
+ return self::$info;
+ }
+
+ public static function get_http_code()
+ {
+ return self::$info['http_code'];
+ }
+}
+
+function classic_curl($urls, $delay)
+{
+ $queue = curl_multi_init();
+ $map = array();
+
+ foreach ($urls as $url)
+ {
+ // create cURL resources
+ $ch = curl_init();
+
+ // 设置 URL 和 其他参数
+ curl_setopt($ch, CURLOPT_URL, $url);
+ curl_setopt($ch, CURLOPT_TIMEOUT, 1);
+ curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
+ curl_setopt($ch, CURLOPT_HEADER, 0);
+ curl_setopt($ch, CURLOPT_NOSIGNAL, true);
+
+ // 把当前 curl resources 加入到 curl_multi_init 队列
+ curl_multi_add_handle($queue, $ch);
+ $map[$url] = $ch;
+ }
+
+ $active = null;
+
+ // execute the handles
+ do {
+ $mrc = curl_multi_exec($queue, $active);
+ } while ($mrc == CURLM_CALL_MULTI_PERFORM);
+
+ while ($active > 0 && $mrc == CURLM_OK) {
+ while (curl_multi_exec($queue, $active) === CURLM_CALL_MULTI_PERFORM);
+ // 这里 curl_multi_select 一直返回 -1,所以这里就死循环了,CPU就100%了
+ if (curl_multi_select($queue, 0.5) != -1)
+ {
+ do {
+ $mrc = curl_multi_exec($queue, $active);
+ } while ($mrc == CURLM_CALL_MULTI_PERFORM);
+ }
+ }
+
+ $responses = array();
+ foreach ($map as $url=>$ch) {
+ //$responses[$url] = callback(curl_multi_getcontent($ch), $delay);
+ $responses[$url] = callback(curl_multi_getcontent($ch), $delay, $url);
+ curl_multi_remove_handle($queue, $ch);
+ curl_close($ch);
+ }
+
+ curl_multi_close($queue);
+ return $responses;
+}
+
+function rolling_curl($urls, $delay)
+{
+ $queue = curl_multi_init();
+ $map = array();
+
+ foreach ($urls as $url) {
+ $ch = curl_init();
+
+ curl_setopt($ch, CURLOPT_URL, $url);
+ curl_setopt($ch, CURLOPT_TIMEOUT, 10);
+ curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
+ curl_setopt($ch, CURLOPT_HEADER, 0);
+ curl_setopt($ch, CURLOPT_NOSIGNAL, true);
+ $cookie = '_za=36643642-e546-4d60-a771-8af8dcfbd001; q_c1=a57a2b9f10964f909b8d8969febf3ab2|1437705596000|1437705596000; _xsrf=f0304fba4e44e1d008ec308d59bab029; cap_id="YWY1YmRmODlmZGVmNDc3MWJlZGFkZDg3M2E0M2Q5YjM=|1437705596|963518c454bb6f10d96775021c098c84e1e46f5a"; z_c0="QUFCQVgtRWZBQUFYQUFBQVlRSlZUVjR6NEZVUTgtRkdjTVc5UDMwZXRJZFdWZ2JaOWctNVhnPT0=|1438164574|aed6ef3707f246a7b64da4f1e8c089395d77ff2b"; __utma=51854390.1105113342.1437990174.1438160686.1438164116.10; __utmc=51854390; __utmz=51854390.1438134939.8.5.utmcsr=zhihu.com|utmccn=(referral)|utmcmd=referral|utmcct=/people/yangzetao; __utmv=51854390.100-1|2=registration_date=20131030=1^3=entry_date=20131030=1';
+ curl_setopt($ch, CURLOPT_COOKIE, $cookie);
+ $useragent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.89 Safari/537.36';
+ curl_setopt( $ch, CURLOPT_USERAGENT, $useragent );
+ curl_setopt($ch, CURLOPT_ENCODING, 'gzip');
+
+ curl_multi_add_handle($queue, $ch);
+ $map[(string) $ch] = $url;
+ }
+
+ $responses = array();
+ do {
+ while (($code = curl_multi_exec($queue, $active)) == CURLM_CALL_MULTI_PERFORM) ;
+
+ if ($code != CURLM_OK) { break; }
+
+ // a request was just completed -- find out which one
+ while ($done = curl_multi_info_read($queue)) {
+
+ // get the info and content returned on the request
+ $info = curl_getinfo($done['handle']);
+ $error = curl_error($done['handle']);
+ $results = callback(curl_multi_getcontent($done['handle']), $delay, $map[(string) $done['handle']]);
+ $responses[$map[(string) $done['handle']]] = compact('info', 'error', 'results');
+
+ // remove the curl handle that just completed
+ curl_multi_remove_handle($queue, $done['handle']);
+ curl_close($done['handle']);
+ }
+
+ // Block for data in / output; error handling is done by curl_multi_exec
+ if ($active > 0) {
+ curl_multi_select($queue, 0.5);
+ }
+
+ } while ($active);
+
+ curl_multi_close($queue);
+ return $responses;
+}
+
+function callback($data, $delay, $url) {
+ //echo $data;
+ //echo date("Y-m-d H:i:s", time()) . " --- " . $url . "\n";
+ if (!empty($data))
+ {
+ file_put_contents("./html2/".md5($url).".html", $data);
+ }
+ // usleep模拟现实中比较负责的数据处理逻辑(如提取, 分词, 写入文件或数据库等)
+ //usleep(1);
+ //return compact('data', 'matches');
+}
+
diff --git a/vendor/owner888/phpspider/library/cls_query.php b/vendor/owner888/phpspider/library/cls_query.php
new file mode 100644
index 0000000..65dd9fe
--- /dev/null
+++ b/vendor/owner888/phpspider/library/cls_query.php
@@ -0,0 +1,248 @@
+
+ * @created time :2015-08-08 15:52
+ */
+ private static function get_nodes($query)
+ {
+ // 把一到多个空格 替换成 一个空格
+ // 把 > 和 ~ 符号两边的空格去掉,因为没有用这两个符号,所以这里可以不这么做
+ // ul>li.className
+ $query = trim(
+ preg_replace('@\s+@', ' ',
+ preg_replace('@\s*(>|\\+|~)\s*@', '\\1', $query)
+ )
+ );
+
+ $nodes = array();
+ if (! $query)
+ {
+ return $nodes;
+ }
+
+ $query_arr = explode(" ", $query);
+ foreach ($query_arr as $k=>$v)
+ {
+ $path = $k == 0 ? $v : $path.' '.$v;
+ $node = array("path"=>(string)$path, "name"=>"", "id"=>"", "class"=>"", "other"=>array());
+ // 如果存在内容选择器
+ if (preg_match('@(.*?)\[(.*?)=[\'|"](.*?)[\'|"]\]@', $v, $matches) && !empty($matches[2]) && !empty($matches[3]))
+ {
+ // 把选择器过滤掉 [rel='topic']
+ $v = $matches[1];
+ $node['other'] = array(
+ 'key'=>$matches[2],
+ 'val'=>$matches[3],
+ );
+ }
+
+ // 如果存在 id
+ $id_arr = explode("#", $v);
+ $class_arr = explode(".", $v);
+ if (count($id_arr) === 2)
+ {
+ $node['name'] = $id_arr[0];
+ $node['id'] = $id_arr[1];
+ }
+ // 如果存在 class
+ elseif (count($class_arr) === 2)
+ {
+ $node['name'] = $class_arr[0];
+ $node['class'] = $class_arr[1];
+ }
+ // 如果没有样式
+ else
+ {
+ $node['name'] = $v;
+ }
+ $nodes[] = $node;
+ }
+ //print_r($nodes);
+ //exit;
+ return $nodes;
+ }
+
+ public static function get_datas($nodes, $attr = "html")
+ {
+ if (empty(self::$content))
+ {
+ return false;
+ }
+
+ $node_datas = array();
+ $count = count($nodes);
+ // 循环所有节点
+ foreach ($nodes as $i=>$node)
+ {
+ $is_last = $count == $i+1 ? true : false;
+ // 第一次
+ if ($i == 0)
+ {
+ $datas = array();
+ $datas = self::get_node_datas($node, self::$content, $attr, $is_last);
+ // 如果第一次都取不到数据,直接跳出循环
+ if(!$datas)
+ {
+ break;
+ }
+ $node_datas[$nodes[$i]['path']] = $datas;
+ }
+ else
+ {
+ $datas = array();
+ // 循环上一个节点的数组
+ foreach ($node_datas[$nodes[$i-1]['path']] as $v)
+ {
+ $datas = array_merge( $datas, self::get_node_datas($node, trim($v), $attr, $is_last) );
+ }
+ $node_datas[$nodes[$i]['path']] = $datas;
+ // 删除上一个节点,防止内存溢出,或者缓存到本地,再次使用?!
+ unset($node_datas[$nodes[$i-1]['path']]);
+ }
+ }
+ //print_r($datas);exit;
+ // 从数组中弹出最后一个元素
+ $node_datas = array_pop($node_datas);
+ //print_r($node_datas);
+ //exit;
+ return $node_datas;
+ }
+
+ /**
+ * 从节点中获取内容
+ * $regex = '@]+http-equiv\\s*=\\s*(["|\'])Content-Type\\1([^>]+?)>@i';
+ *
+ * @param mixed $node
+ * @param mixed $content
+ * @return void
+ * @author seatle
+ * @created time :2015-08-08 15:52
+ */
+ private static function get_node_datas($node, $content, $attr = "html", $is_last = false)
+ {
+ $node_datas = $datas = array();
+
+ if (!empty($node['id']))
+ {
+ if ($node['name'])
+ $regex = '@<'.$node['name'].'[^>]+id\\s*=\\s*["|\']+?'.$node['id'].'\\s*[^>]+?>(.*?)'.$node['name'].'>@is';
+ else
+ $regex = '@id\\s*=\\s*["|\']+?'.$node['id'].'\\s*[^>]+?>(.*?)<@is';
+ }
+ elseif (!empty($node['class']))
+ {
+ if ($node['name'])
+ $regex = '@<'.$node['name'].'[^>]+class\\s*=\\s*["|\']+?'.$node['class'].'\\s*[^>]+?>(.*?)'.$node['name'].'>@is';
+ else
+ $regex = '@class\\s*=\\s*["|\']+?'.$node['class'].'\\s*[^>]+?>(.*?)<@is';
+ }
+ else
+ {
+ // 这里为是么是*,0次到多次,因为有可能是
+ $regex = '@<'.$node['name'].'[^>]*?>(.*?)'.$node['name'].'>@is';
+ }
+ self::log("regex --- " . $regex);;
+ preg_match_all($regex, $content, $matches);
+ $all_datas = empty($matches[0]) ? array() : $matches[0];
+ $html_datas = empty($matches[1]) ? array() : $matches[1];
+
+ // 过滤掉选择器对不上的
+ foreach ($all_datas as $i=>$data)
+ {
+ // 如果有设置其他选择器,验证一下选择器
+ if (!empty($node['other']))
+ {
+ $regex = '@'.$node['other']['key'].'=[\'|"]'.$node['other']['val'].'[\'|"]@is';
+ self::log("regex other --- " . $regex);
+ // 过滤器对不上的,跳过
+ if (!preg_match($regex, $data, $matches))
+ {
+ continue;
+ }
+ }
+ // 获取节点的html内容
+ if ($attr != "html" && $is_last)
+ {
+ $regex = '@'.$attr.'=[\'|"](.*?)[\'|"]@is';
+ preg_match($regex, $data, $matches);
+ $node_datas[] = empty($matches[1]) ? '' : trim($matches[1]);
+ }
+ // 获取节点属性名的值
+ else
+ {
+ $node_datas[] = trim($html_datas[$i]);
+ }
+ }
+ //echo " 11111 ========================================= \n";
+ //print_r($node_datas);
+ //echo " 22222 ========================================= \n\n\n";
+ return $node_datas;
+ }
+
+ /**
+ * 记录日志
+ * @param string $msg
+ * @return void
+ */
+ private static function log($msg)
+ {
+ $msg = "[".date("Y-m-d H:i:s")."] " . $msg . "\n";
+ if (self::$debug)
+ {
+ echo $msg;
+ }
+ }
+
+}
+
+//$xpath = "ul.top-nav-dropdown li";
+//$xpath = "i.zg-icon";
+//print_r($nodes);
+//exit;
+// [^>]+ 不是>的字符重复一次到多次, ? 表示不贪婪
+// \s 表示空白字符
+// * 表示0次或者多次
+// + 表示1次或者多次
+//
+// 后向引用,表示表达式中,从左往右数,第一个左括号对应的括号内的内容。
+// \\0 表示整个表达式
+// \\1表示第1个表达式
+// \\2表示第2个表达式
+// $regex = '@]+http-equiv\\s*=\\s*(["|\'])Content-Type\\1([^>]+?)>@i';
+//preg_match_all($regex, $content, $matches);
+//print_r($matches);
+//exit;
+
+// 用法
+//$content = file_get_contents("./test.html");
+//$query = "ul#top-nav-profile-dropdown li a";
+//$query = "div#zh-profile-following-topic a.link[href='/topic/19550937']";
+//cls_query::init($content);
+//$list = cls_query::query($query, "href");
+//print_r($list);
+
diff --git a/vendor/owner888/phpspider/library/cls_redis.php b/vendor/owner888/phpspider/library/cls_redis.php
new file mode 100644
index 0000000..88dc0b8
--- /dev/null
+++ b/vendor/owner888/phpspider/library/cls_redis.php
@@ -0,0 +1,1263 @@
+
+// +----------------------------------------------------------------------
+
+//----------------------------------
+// PHPSpider Redis操作类文件
+//----------------------------------
+
+class cls_redis
+{
+ /**
+ * redis链接标识符号
+ */
+ protected static $redis = NULL;
+
+ /**
+ * redis配置数组
+ */
+ protected static $configs = array();
+ private static $links = array();
+ private static $link_name = 'default';
+
+ /**
+ * 默认redis前缀
+ */
+ public static $prefix = "phpspider";
+
+ public static $error = "";
+
+ public static function init()
+ {
+ if (!extension_loaded("redis"))
+ {
+ self::$error = "The redis extension was not found";
+ return false;
+ }
+
+ // 获取配置
+ $config = self::$link_name == 'default' ? self::_get_default_config() : self::$configs[self::$link_name];
+
+ // 如果当前链接标识符为空,或者ping不同,就close之后重新打开
+ //if ( empty(self::$links[self::$link_name]) || !self::ping() )
+ if (empty(self::$links[self::$link_name]))
+ {
+ self::$links[self::$link_name] = new Redis();
+ if (!self::$links[self::$link_name]->connect($config['host'], $config['port'], $config['timeout']))
+ {
+ self::$error = "Unable to connect to redis server\nPlease check the configuration file config/inc_config.php";
+ unset(self::$links[self::$link_name]);
+ return false;
+ }
+
+ // 验证
+ if ($config['pass'])
+ {
+ if ( !self::$links[self::$link_name]->auth($config['pass']) )
+ {
+ self::$error = "Redis Server authentication failed\nPlease check the configuration file config/inc_config.php";
+ unset(self::$links[self::$link_name]);
+ return false;
+ }
+ }
+
+ $prefix = empty($config['prefix']) ? self::$prefix : $config['prefix'];
+ self::$links[self::$link_name]->setOption(Redis::OPT_PREFIX, $prefix . ":");
+ self::$links[self::$link_name]->setOption(Redis::OPT_READ_TIMEOUT, -1);
+ self::$links[self::$link_name]->select($config['db']);
+ }
+
+ return self::$links[self::$link_name];
+ }
+
+ public static function clear_link()
+ {
+ if(self::$links)
+ {
+ foreach(self::$links as $k=>$v)
+ {
+ $v->close();
+ unset(self::$links[$k]);
+ }
+ }
+ }
+
+ public static function set_connect($link_name, $config = array())
+ {
+ self::$link_name = $link_name;
+ if (!empty($config))
+ {
+ self::$configs[self::$link_name] = $config;
+ }
+ else
+ {
+ if (empty(self::$configs[self::$link_name]))
+ {
+ throw new Exception("You not set a config array for connect!");
+ }
+ }
+ //print_r(self::$configs);
+
+ //// 先断开原来的连接
+ //if ( !empty(self::$links[self::$link_name]) )
+ //{
+ //self::$links[self::$link_name]->close();
+ //self::$links[self::$link_name] = null;
+ //}
+ }
+
+ public static function set_connect_default()
+ {
+ $config = self::_get_default_config();
+ self::set_connect('default', $config);
+ }
+
+ /**
+ * 获取默认配置
+ */
+ protected static function _get_default_config()
+ {
+ if (empty(self::$configs['default']))
+ {
+ if (!is_array($GLOBALS['config']['redis']))
+ {
+ exit('cls_redis.php _get_default_config()' . '没有redis配置');
+ // You not set a config array for connect\nPlease check the configuration file config/inc_config.php
+ }
+ self::$configs['default'] = $GLOBALS['config']['redis'];
+ }
+ return self::$configs['default'];
+ }
+
+ /**
+ * set
+ *
+ * @param mixed $key 键
+ * @param mixed $value 值
+ * @param int $expire 过期时间,单位:秒
+ * @return void
+ * @author seatle
+ * @created time :2015-12-13 01:05
+ */
+ public static function set($key, $value, $expire = 0)
+ {
+ self::init();
+ try
+ {
+ if ( self::$links[self::$link_name] )
+ {
+ if ($expire > 0)
+ {
+ return self::$links[self::$link_name]->setex($key, $expire, $value);
+ }
+ else
+ {
+ return self::$links[self::$link_name]->set($key, $value);
+ }
+ }
+ }
+ catch (Exception $e)
+ {
+ $msg = "PHP Fatal error: Uncaught exception 'RedisException' with message '".$e->getMessage()."'\n";
+ log::warn($msg);
+ if ($e->getCode() == 0)
+ {
+ self::$links[self::$link_name]->close();
+ self::$links[self::$link_name] = null;
+ usleep(100000);
+ return self::set($key, $value, $expire);
+ }
+ }
+ return NULL;
+ }
+
+
+ /**
+ * set
+ *
+ * @param mixed $key 键
+ * @param mixed $value 值
+ * @param int $expire 过期时间,单位:秒
+ * @return void
+ * @author seatle
+ * @created time :2015-12-13 01:05
+ */
+ public static function setnx($key, $value, $expire = 0)
+ {
+ self::init();
+ try
+ {
+ if ( self::$links[self::$link_name] )
+ {
+ if ($expire > 0)
+ {
+ return self::$links[self::$link_name]->set($key, $value, array('nx', 'ex' => $expire));
+ //self::$links[self::$link_name]->multi();
+ //self::$links[self::$link_name]->setNX($key, $value);
+ //self::$links[self::$link_name]->expire($key, $expire);
+ //self::$links[self::$link_name]->exec();
+ //return true;
+ }
+ else
+ {
+ return self::$links[self::$link_name]->setnx($key, $value);
+ }
+ }
+ }
+ catch (Exception $e)
+ {
+ $msg = "PHP Fatal error: Uncaught exception 'RedisException' with message '".$e->getMessage()."'\n";
+ log::warn($msg);
+ if ($e->getCode() == 0)
+ {
+ self::$links[self::$link_name]->close();
+ self::$links[self::$link_name] = null;
+ usleep(100000);
+ return self::setnx($key, $value, $expire);
+ }
+ }
+ return NULL;
+ }
+
+ /**
+ * 锁
+ * 默认锁1秒
+ *
+ * @param mixed $name 锁的标识名
+ * @param mixed $value 锁的值,貌似没啥意义
+ * @param int $expire 当前锁的最大生存时间(秒),必须大于0,超过生存时间系统会自动强制释放锁
+ * @param int $interval 获取锁失败后挂起再试的时间间隔(微秒)
+ * @return void
+ * @author seatle
+ * @created time :2016-10-30 23:56
+ */
+ public static function lock($name, $value = 1, $expire = 5, $interval = 100000)
+ {
+ if ($name == null) return false;
+
+ self::init();
+ try
+ {
+ if ( self::$links[self::$link_name] )
+ {
+ $key = "Lock:{$name}";
+ while (true)
+ {
+ // 因为 setnx 没有 expire 设置,所以还是用set
+ //$result = self::$links[self::$link_name]->setnx($key, $value);
+ $result = self::$links[self::$link_name]->set($key, $value, array('nx', 'ex' => $expire));
+ if ($result != false)
+ {
+ return true;
+ }
+
+ usleep($interval);
+ }
+ return false;
+ }
+ }
+ catch (Exception $e)
+ {
+ $msg = "PHP Fatal error: Uncaught exception 'RedisException' with message '".$e->getMessage()."'\n";
+ log::warn($msg);
+ if ($e->getCode() == 0)
+ {
+ self::$links[self::$link_name]->close();
+ self::$links[self::$link_name] = null;
+ // 睡眠100毫秒
+ usleep(100000);
+ return self::lock($name, $value, $expire, $interval);
+ }
+ }
+ return false;
+ }
+
+ public static function unlock($name)
+ {
+ $key = "Lock:{$name}";
+ return self::del($key);
+ }
+
+ /**
+ * get
+ *
+ * @param mixed $key
+ * @return void
+ * @author seatle
+ * @created time :2015-12-13 01:05
+ */
+ public static function get($key)
+ {
+ self::init();
+ try
+ {
+ if ( self::$links[self::$link_name] )
+ {
+ return self::$links[self::$link_name]->get($key);
+ }
+ }
+ catch (Exception $e)
+ {
+ $msg = "PHP Fatal error: Uncaught exception 'RedisException' with message '".$e->getMessage()."'\n";
+ log::warn($msg);
+ if ($e->getCode() == 0)
+ {
+ self::$links[self::$link_name]->close();
+ self::$links[self::$link_name] = null;
+ usleep(100000);
+ return self::get($key);
+ }
+ }
+ return NULL;
+ }
+
+ /**
+ * del 删除数据
+ *
+ * @param mixed $key
+ * @return void
+ * @author seatle
+ * @created time :2015-12-13 01:05
+ */
+ public static function del($key)
+ {
+ self::init();
+ try
+ {
+ if ( self::$links[self::$link_name] )
+ {
+ return self::$links[self::$link_name]->del($key);
+ }
+ }
+ catch (Exception $e)
+ {
+ $msg = "PHP Fatal error: Uncaught exception 'RedisException' with message '".$e->getMessage()."'\n";
+ log::warn($msg);
+ if ($e->getCode() == 0)
+ {
+ self::$links[self::$link_name]->close();
+ self::$links[self::$link_name] = null;
+ usleep(100000);
+ return self::del($key);
+ }
+ }
+ return NULL;
+ }
+
+ /**
+ * type 返回值的类型
+ *
+ * @param mixed $key
+ * @return void
+ * @author seatle
+ * @created time :2015-12-13 01:05
+ */
+ public static function type($key)
+ {
+ self::init();
+
+ $types = array(
+ '0' => 'set',
+ '1' => 'string',
+ '3' => 'list',
+ );
+
+ try
+ {
+ if ( self::$links[self::$link_name] )
+ {
+ $type = self::$links[self::$link_name]->type($key);
+ if (isset($types[$type]))
+ {
+ return $types[$type];
+ }
+ }
+ }
+ catch (Exception $e)
+ {
+ $msg = "PHP Fatal error: Uncaught exception 'RedisException' with message '".$e->getMessage()."'\n";
+ log::warn($msg);
+ if ($e->getCode() == 0)
+ {
+ self::$links[self::$link_name]->close();
+ self::$links[self::$link_name] = null;
+ usleep(100000);
+ return self::type($key);
+ }
+ }
+ return NULL;
+ }
+
+ /**
+ * incr 名称为key的string增加integer, integer为0则增1
+ *
+ * @param mixed $key
+ * @param int $integer
+ * @return void
+ * @author seatle
+ * @created time :2015-12-18 11:28
+ */
+ public static function incr($key, $integer = 0)
+ {
+ self::init();
+ try
+ {
+ if ( self::$links[self::$link_name] )
+ {
+ if (empty($integer))
+ {
+ return self::$links[self::$link_name]->incr($key);
+ }
+ else
+ {
+ return self::$links[self::$link_name]->incrby($key, $integer);
+ }
+ }
+ }
+ catch (Exception $e)
+ {
+ $msg = "PHP Fatal error: Uncaught exception 'RedisException' with message '".$e->getMessage()."'\n";
+ log::warn($msg);
+ if ($e->getCode() == 0)
+ {
+ self::$links[self::$link_name]->close();
+ self::$links[self::$link_name] = null;
+ usleep(100000);
+ return self::incr($key, $integer);
+ }
+ }
+ return NULL;
+ }
+
+ /**
+ * decr 名称为key的string减少integer, integer为0则减1
+ *
+ * @param mixed $key
+ * @param int $integer
+ * @return void
+ * @author seatle
+ * @created time :2015-12-18 11:28
+ */
+ public static function decr($key, $integer = 0)
+ {
+ self::init();
+ try
+ {
+ if ( self::$links[self::$link_name] )
+ {
+ if (empty($integer))
+ {
+ return self::$links[self::$link_name]->decr($key);
+ }
+ else
+ {
+ return self::$links[self::$link_name]->decrby($key, $integer);
+ }
+ }
+ }
+ catch (Exception $e)
+ {
+ $msg = "PHP Fatal error: Uncaught exception 'RedisException' with message '".$e->getMessage()."'\n";
+ log::warn($msg);
+ if ($e->getCode() == 0)
+ {
+ self::$links[self::$link_name]->close();
+ self::$links[self::$link_name] = null;
+ usleep(100000);
+ return self::decr($key, $integer);
+ }
+ }
+ return NULL;
+ }
+
+ /**
+ * append 名称为key的string的值附加value
+ *
+ * @param mixed $key
+ * @param mixed $value
+ * @return void
+ * @author seatle
+ * @created time :2015-12-18 11:28
+ */
+ public static function append($key, $value)
+ {
+ self::init();
+ try
+ {
+ if ( self::$links[self::$link_name] )
+ {
+ return self::$links[self::$link_name]->append($key, $value);
+ }
+ }
+ catch (Exception $e)
+ {
+ $msg = "PHP Fatal error: Uncaught exception 'RedisException' with message '".$e->getMessage()."'\n";
+ log::warn($msg);
+ if ($e->getCode() == 0)
+ {
+ self::$links[self::$link_name]->close();
+ self::$links[self::$link_name] = null;
+ usleep(100000);
+ return self::append($key, $value);
+ }
+ }
+ return NULL;
+ }
+
+ /**
+ * substr 返回名称为key的string的value的子串
+ *
+ * @param mixed $key
+ * @param mixed $start
+ * @param mixed $end
+ * @return void
+ * @author seatle
+ * @created time :2015-12-18 11:28
+ */
+ public static function substr($key, $start, $end)
+ {
+ self::init();
+ try
+ {
+ if ( self::$links[self::$link_name] )
+ {
+ return self::$links[self::$link_name]->substr($key, $start, $end);
+ }
+ }
+ catch (Exception $e)
+ {
+ $msg = "PHP Fatal error: Uncaught exception 'RedisException' with message '".$e->getMessage()."'\n";
+ log::warn($msg);
+ if ($e->getCode() == 0)
+ {
+ self::$links[self::$link_name]->close();
+ self::$links[self::$link_name] = null;
+ usleep(100000);
+ return self::substr($key, $start, $end);
+ }
+ }
+ return NULL;
+ }
+
+ /**
+ * select 按索引查询
+ *
+ * @param mixed $index
+ * @return void
+ * @author seatle
+ * @created time :2015-12-18 11:28
+ */
+ public static function select($index)
+ {
+ self::init();
+ try
+ {
+ if ( self::$links[self::$link_name] )
+ {
+ return self::$links[self::$link_name]->select($index);
+ }
+ }
+ catch (Exception $e)
+ {
+ $msg = "PHP Fatal error: Uncaught exception 'RedisException' with message '".$e->getMessage()."'\n";
+ log::warn($msg);
+ if ($e->getCode() == 0)
+ {
+ self::$links[self::$link_name]->close();
+ self::$links[self::$link_name] = null;
+ usleep(100000);
+ return self::select($index);
+ }
+ }
+ return NULL;
+ }
+
+ /**
+ * dbsize 返回当前数据库中key的数目
+ *
+ * @param mixed $key
+ * @return void
+ * @author seatle
+ * @created time :2015-12-18 11:28
+ */
+ public static function dbsize()
+ {
+ self::init();
+ try
+ {
+ if ( self::$links[self::$link_name] )
+ {
+ return self::$links[self::$link_name]->dbsize();
+ }
+ }
+ catch (Exception $e)
+ {
+ $msg = "PHP Fatal error: Uncaught exception 'RedisException' with message '".$e->getMessage()."'\n";
+ log::warn($msg);
+ if ($e->getCode() == 0)
+ {
+ self::$links[self::$link_name]->close();
+ self::$links[self::$link_name] = null;
+ usleep(100000);
+ return self::dbsize();
+ }
+ }
+ return NULL;
+ }
+
+ /**
+ * flushdb 删除当前选择数据库中的所有key
+ *
+ * @return void
+ * @author seatle
+ * @created time :2015-12-18 11:28
+ */
+ public static function flushdb()
+ {
+ self::init();
+ try
+ {
+ if ( self::$links[self::$link_name] )
+ {
+ return self::$links[self::$link_name]->flushdb();
+ }
+ }
+ catch (Exception $e)
+ {
+ $msg = "PHP Fatal error: Uncaught exception 'RedisException' with message '".$e->getMessage()."'\n";
+ log::warn($msg);
+ if ($e->getCode() == 0)
+ {
+ self::$links[self::$link_name]->close();
+ self::$links[self::$link_name] = null;
+ usleep(100000);
+ return self::flushdb();
+ }
+ }
+ return NULL;
+ }
+
+ /**
+ * flushall 删除所有数据库中的所有key
+ *
+ * @return void
+ * @author seatle
+ * @created time :2015-12-18 11:28
+ */
+ public static function flushall()
+ {
+ self::init();
+ try
+ {
+ if ( self::$links[self::$link_name] )
+ {
+ return self::$links[self::$link_name]->flushall();
+ }
+ }
+ catch (Exception $e)
+ {
+ $msg = "PHP Fatal error: Uncaught exception 'RedisException' with message '".$e->getMessage()."'\n";
+ log::warn($msg);
+ if ($e->getCode() == 0)
+ {
+ self::$links[self::$link_name]->close();
+ self::$links[self::$link_name] = null;
+ usleep(100000);
+ return self::flushall();
+ }
+ }
+ return NULL;
+ }
+
+ /**
+ * save 将数据保存到磁盘
+ *
+ * @param mixed $is_bgsave 将数据异步保存到磁盘
+ * @return void
+ * @author seatle
+ * @created time :2015-12-18 11:28
+ */
+ public static function save($is_bgsave = false)
+ {
+ self::init();
+ try
+ {
+ if ( self::$links[self::$link_name] )
+ {
+ if (!$is_bgsave)
+ {
+ return self::$links[self::$link_name]->save();
+ }
+ else
+ {
+ return self::$links[self::$link_name]->bgsave();
+ }
+ }
+ }
+ catch (Exception $e)
+ {
+ $msg = "PHP Fatal error: Uncaught exception 'RedisException' with message '".$e->getMessage()."'\n";
+ log::warn($msg);
+ if ($e->getCode() == 0)
+ {
+ self::$links[self::$link_name]->close();
+ self::$links[self::$link_name] = null;
+ usleep(100000);
+ return self::save($is_bgsave);
+ }
+ }
+ return NULL;
+ }
+
+ /**
+ * info 提供服务器的信息和统计
+ *
+ * @return void
+ * @author seatle
+ * @created time :2015-12-18 11:28
+ */
+ public static function info()
+ {
+ self::init();
+ try
+ {
+ if ( self::$links[self::$link_name] )
+ {
+ return self::$links[self::$link_name]->info();
+ }
+ }
+ catch (Exception $e)
+ {
+ $msg = "PHP Fatal error: Uncaught exception 'RedisException' with message '".$e->getMessage()."'\n";
+ log::warn($msg);
+ if ($e->getCode() == 0)
+ {
+ self::$links[self::$link_name]->close();
+ self::$links[self::$link_name] = null;
+ usleep(100000);
+ return self::info();
+ }
+ }
+ return NULL;
+ }
+
+ /**
+ * slowlog 慢查询日志
+ *
+ * @return void
+ * @author seatle
+ * @created time :2015-12-18 11:28
+ */
+ public static function slowlog($command = 'get', $len = 0)
+ {
+ self::init();
+ try
+ {
+ if ( self::$links[self::$link_name] )
+ {
+ if (!empty($len))
+ {
+ return $redis->slowlog($command, $len);
+ }
+ else
+ {
+ return $redis->slowlog($command);
+ }
+ }
+ }
+ catch (Exception $e)
+ {
+ $msg = "PHP Fatal error: Uncaught exception 'RedisException' with message '".$e->getMessage()."'\n";
+ log::warn($msg);
+ if ($e->getCode() == 0)
+ {
+ self::$links[self::$link_name]->close();
+ self::$links[self::$link_name] = null;
+ usleep(100000);
+ return self::slowlog($command, $len);
+ }
+ }
+ return NULL;
+ }
+
+ /**
+ * lastsave 返回上次成功将数据保存到磁盘的Unix时戳
+ *
+ * @return void
+ * @author seatle
+ * @created time :2015-12-18 11:28
+ */
+ public static function lastsave()
+ {
+ self::init();
+ try
+ {
+ if ( self::$links[self::$link_name] )
+ {
+ return self::$links[self::$link_name]->lastsave();
+ }
+ }
+ catch (Exception $e)
+ {
+ $msg = "PHP Fatal error: Uncaught exception 'RedisException' with message '".$e->getMessage()."'\n";
+ log::warn($msg);
+ if ($e->getCode() == 0)
+ {
+ self::$links[self::$link_name]->close();
+ self::$links[self::$link_name] = null;
+ usleep(100000);
+ return self::lastsave();
+ }
+ }
+ return NULL;
+ }
+
+ /**
+ * lpush 将数据从左边压入
+ *
+ * @param mixed $key
+ * @param mixed $value
+ * @return void
+ * @author seatle
+ * @created time :2015-12-13 01:05
+ */
+ public static function lpush($key, $value)
+ {
+ self::init();
+ try
+ {
+ if ( self::$links[self::$link_name] )
+ {
+ return self::$links[self::$link_name]->lpush($key, $value);
+ }
+ }
+ catch (Exception $e)
+ {
+ $msg = "PHP Fatal error: Uncaught exception 'RedisException' with message '".$e->getMessage()."'\n";
+ log::warn($msg);
+ if ($e->getCode() == 0)
+ {
+ self::$links[self::$link_name]->close();
+ self::$links[self::$link_name] = null;
+ usleep(100000);
+ return self::lpush($key, $value);
+ }
+ }
+ return NULL;
+ }
+
+ /**
+ * rpush 将数据从右边压入
+ *
+ * @param mixed $key
+ * @param mixed $value
+ * @return void
+ * @author seatle
+ * @created time :2015-12-13 01:05
+ */
+ public static function rpush($key, $value)
+ {
+ self::init();
+ try
+ {
+ if ( self::$links[self::$link_name] )
+ {
+ return self::$links[self::$link_name]->rpush($key, $value);
+ }
+ }
+ catch (Exception $e)
+ {
+ $msg = "PHP Fatal error: Uncaught exception 'RedisException' with message '".$e->getMessage()."'\n";
+ log::warn($msg);
+ if ($e->getCode() == 0)
+ {
+ self::$links[self::$link_name]->close();
+ self::$links[self::$link_name] = null;
+ usleep(100000);
+ return self::rpush($key, $value);
+ }
+ }
+ return NULL;
+ }
+
+ /**
+ * lpop 从左边弹出数据, 并删除数据
+ *
+ * @param mixed $key
+ * @return void
+ * @author seatle
+ * @created time :2015-12-13 01:05
+ */
+ public static function lpop($key)
+ {
+ self::init();
+ try
+ {
+ if ( self::$links[self::$link_name] )
+ {
+ return self::$links[self::$link_name]->lpop($key);
+ }
+ }
+ catch (Exception $e)
+ {
+ $msg = "PHP Fatal error: Uncaught exception 'RedisException' with message '".$e->getMessage()."'\n";
+ log::warn($msg);
+ if ($e->getCode() == 0)
+ {
+ self::$links[self::$link_name]->close();
+ self::$links[self::$link_name] = null;
+ usleep(100000);
+ return self::lpop($key);
+ }
+ }
+ return NULL;
+ }
+
+ /**
+ * rpop 从右边弹出数据, 并删除数据
+ *
+ * @param mixed $key
+ * @return void
+ * @author seatle
+ * @created time :2015-12-13 01:05
+ */
+ public static function rpop($key)
+ {
+ self::init();
+ try
+ {
+ if ( self::$links[self::$link_name] )
+ {
+ return self::$links[self::$link_name]->rpop($key);
+ }
+ }
+ catch (Exception $e)
+ {
+ $msg = "PHP Fatal error: Uncaught exception 'RedisException' with message '".$e->getMessage()."'\n";
+ log::warn($msg);
+ if ($e->getCode() == 0)
+ {
+ self::$links[self::$link_name]->close();
+ self::$links[self::$link_name] = null;
+ usleep(100000);
+ return self::rpop($key);
+ }
+ }
+ return NULL;
+ }
+
+ /**
+ * lsize 队列长度,同llen
+ *
+ * @param mixed $key
+ * @return void
+ * @author seatle
+ * @created time :2015-12-13 01:05
+ */
+ public static function lsize($key)
+ {
+ self::init();
+ try
+ {
+ if ( self::$links[self::$link_name] )
+ {
+ return self::$links[self::$link_name]->lSize($key);
+ }
+ }
+ catch (Exception $e)
+ {
+ $msg = "PHP Fatal error: Uncaught exception 'RedisException' with message '".$e->getMessage()."'\n";
+ log::warn($msg);
+ if ($e->getCode() == 0)
+ {
+ self::$links[self::$link_name]->close();
+ self::$links[self::$link_name] = null;
+ usleep(100000);
+ return self::lsize($key);
+ }
+ }
+ return NULL;
+ }
+
+ /**
+ * lget 获取数据
+ *
+ * @param mixed $key
+ * @param int $index
+ * @return void
+ * @author seatle
+ * @created time :2015-12-13 01:05
+ */
+ public static function lget($key, $index = 0)
+ {
+ self::init();
+ try
+ {
+ if ( self::$links[self::$link_name] )
+ {
+ return self::$links[self::$link_name]->lget($key, $index);
+ }
+ }
+ catch (Exception $e)
+ {
+ $msg = "PHP Fatal error: Uncaught exception 'RedisException' with message '".$e->getMessage()."'\n";
+ log::warn($msg);
+ if ($e->getCode() == 0)
+ {
+ self::$links[self::$link_name]->close();
+ self::$links[self::$link_name] = null;
+ usleep(100000);
+ return self::lget($key, $index);
+ }
+ }
+ return NULL;
+ }
+
+ /**
+ * lRange 获取范围数据
+ *
+ * @param mixed $key
+ * @param mixed $start
+ * @param mixed $end
+ * @return void
+ * @author seatle
+ * @created time :2015-12-13 01:05
+ */
+ public static function lrange($key, $start, $end)
+ {
+ self::init();
+ try
+ {
+ if ( self::$links[self::$link_name] )
+ {
+ return self::$links[self::$link_name]->lRange($key, $start, $end);
+ }
+ }
+ catch (Exception $e)
+ {
+ $msg = "PHP Fatal error: Uncaught exception 'RedisException' with message '".$e->getMessage()."'\n";
+ log::warn($msg);
+ if ($e->getCode() == 0)
+ {
+ self::$links[self::$link_name]->close();
+ self::$links[self::$link_name] = null;
+ usleep(100000);
+ return self::lrange($key, $start, $end);
+ }
+ }
+ return NULL;
+ }
+
+ /**
+ * rlist 从右边弹出 $length 长度数据,并删除数据
+ *
+ * @param mixed $key
+ * @param mixed $length
+ * @return void
+ * @author seatle
+ * @created time :2015-12-13 01:05
+ */
+ public static function rlist($key, $length)
+ {
+ $queue_length = self::lsize($key);
+ // 如果队列中有数据
+ if ($queue_length > 0)
+ {
+ $list = array();
+ $count = ($queue_length >= $length) ? $length : $queue_length;
+ for ($i = 0; $i < $count; $i++)
+ {
+ $data = self::rpop($key);
+ if ($data === false)
+ {
+ continue;
+ }
+
+ $list[] = $data;
+ }
+ return $list;
+ }
+ else
+ {
+ // 没有数据返回NULL
+ return NULL;
+ }
+ }
+
+ /**
+ * keys
+ *
+ * @param mixed $key
+ * @return void
+ * @author seatle
+ * @created time :2015-12-13 01:05
+ * 查找符合给定模式的key。
+ * KEYS *命中数据库中所有key。
+ * KEYS h?llo命中hello, hallo and hxllo等。
+ * KEYS h*llo命中hllo和heeeeello等。
+ * KEYS h[ae]llo命中hello和hallo,但不命中hillo。
+ * 特殊符号用"\"隔开
+ * 因为这个类加了OPT_PREFIX前缀,所以并不能真的列出redis所有的key,需要的话,要把前缀去掉
+ */
+ public static function keys($key)
+ {
+ self::init();
+ try
+ {
+ if ( self::$links[self::$link_name] )
+ {
+ return self::$links[self::$link_name]->keys($key);
+ }
+ }
+ catch (Exception $e)
+ {
+ $msg = "PHP Fatal error: Uncaught exception 'RedisException' with message '".$e->getMessage()."'\n";
+ log::warn($msg);
+ if ($e->getCode() == 0)
+ {
+ self::$links[self::$link_name]->close();
+ self::$links[self::$link_name] = null;
+ usleep(100000);
+ return self::keys($key);
+ }
+ }
+ return NULL;
+ }
+
+ /**
+ * ttl 返回某个KEY的过期时间
+ * 正数:剩余多少秒
+ * -1:永不超时
+ * -2:key不存在
+ * @param mixed $key
+ * @return void
+ * @author seatle
+ * @created time :2015-12-13 01:05
+ */
+ public static function ttl($key)
+ {
+ self::init();
+ try
+ {
+ if ( self::$links[self::$link_name] )
+ {
+ return self::$links[self::$link_name]->ttl($key);
+ }
+ }
+ catch (Exception $e)
+ {
+ $msg = "PHP Fatal error: Uncaught exception 'RedisException' with message '".$e->getMessage()."'\n";
+ log::warn($msg);
+ if ($e->getCode() == 0)
+ {
+ self::$links[self::$link_name]->close();
+ self::$links[self::$link_name] = null;
+ usleep(100000);
+ return self::ttl($key);
+ }
+ }
+ return NULL;
+ }
+
+ /**
+ * expire 为某个key设置过期时间,同setTimeout
+ *
+ * @param mixed $key
+ * @param mixed $expire
+ * @return void
+ * @author seatle
+ * @created time :2015-12-13 01:05
+ */
+ public static function expire($key, $expire)
+ {
+ self::init();
+ try
+ {
+ if ( self::$links[self::$link_name] )
+ {
+ return self::$links[self::$link_name]->expire($key, $expire);
+ }
+ }
+ catch (Exception $e)
+ {
+ $msg = "PHP Fatal error: Uncaught exception 'RedisException' with message '".$e->getMessage()."'\n";
+ log::warn($msg);
+ if ($e->getCode() == 0)
+ {
+ self::$links[self::$link_name]->close();
+ self::$links[self::$link_name] = null;
+ usleep(100000);
+ return self::expire($key, $expire);
+ }
+ }
+ return NULL;
+ }
+
+ /**
+ * exists key值是否存在
+ *
+ * @param mixed $key
+ * @return void
+ * @author seatle
+ * @created time :2015-12-13 01:05
+ */
+ public static function exists($key)
+ {
+ self::init();
+ try
+ {
+ if ( self::$links[self::$link_name] )
+ {
+ return self::$links[self::$link_name]->exists($key);
+ }
+ }
+ catch (Exception $e)
+ {
+ $msg = "PHP Fatal error: Uncaught exception 'RedisException' with message '".$e->getMessage()."'\n";
+ log::warn($msg);
+ if ($e->getCode() == 0)
+ {
+ self::$links[self::$link_name]->close();
+ self::$links[self::$link_name] = null;
+ usleep(100000);
+ return self::exists($key);
+ }
+ }
+ return false;
+ }
+
+ /**
+ * ping 检查当前redis是否存在且是否可以连接上
+ *
+ * @return void
+ * @author seatle
+ * @created time :2015-12-13 01:05
+ */
+ //protected static function ping()
+ //{
+ //if ( empty (self::$links[self::$link_name]) )
+ //{
+ //return false;
+ //}
+ //return self::$links[self::$link_name]->ping() == '+PONG';
+ //}
+
+ public static function encode($value)
+ {
+ return json_encode($value, JSON_UNESCAPED_UNICODE);
+ }
+
+ public static function decode($value)
+ {
+ return json_decode($value, true);
+ }
+}
+
+
diff --git a/vendor/owner888/phpspider/library/cls_redis_client.php b/vendor/owner888/phpspider/library/cls_redis_client.php
new file mode 100644
index 0000000..890d928
--- /dev/null
+++ b/vendor/owner888/phpspider/library/cls_redis_client.php
@@ -0,0 +1,121 @@
+
+ * @created time :2018-01-03
+ */
+class cls_redis_client
+{
+ private $redis_socket = false;
+ //private $command = '';
+
+ public function __construct($host='127.0.0.1', $port=6379, $timeout = 3)
+ {
+ $this->redis_socket = stream_socket_client("tcp://".$host.":".$port, $errno, $errstr, $timeout);
+ if ( !$this->redis_socket )
+ {
+ throw new Exception("{$errno} - {$errstr}");
+ }
+ }
+
+ public function __destruct()
+ {
+ fclose($this->redis_socket);
+ }
+
+ public function __call($name, $args)
+ {
+ $crlf = "\r\n";
+ array_unshift($args, $name);
+ $command = '*' . count($args) . $crlf;
+ foreach ($args as $arg)
+ {
+ $command .= '$' . strlen($arg) . $crlf . $arg . $crlf;
+ }
+ //echo $command."\n";
+ $fwrite = fwrite($this->redis_socket, $command);
+ if ($fwrite === FALSE || $fwrite <= 0)
+ {
+ throw new Exception('Failed to write entire command to stream');
+ }
+ return $this->read_response();
+ }
+
+ private function read_response()
+ {
+ $reply = trim(fgets($this->redis_socket, 1024));
+ switch (substr($reply, 0, 1))
+ {
+ case '-':
+ throw new Exception(trim(substr($reply, 1)));
+ break;
+ case '+':
+ $response = substr(trim($reply), 1);
+ if ($response === 'OK')
+ {
+ $response = TRUE;
+ }
+ break;
+ case '$':
+ $response = NULL;
+ if ($reply == '$-1')
+ {
+ break;
+ }
+ $read = 0;
+ $size = intval(substr($reply, 1));
+ if ($size > 0)
+ {
+ do
+ {
+ $block_size = ($size - $read) > 1024 ? 1024 : ($size - $read);
+ $r = fread($this->redis_socket, $block_size);
+ if ($r === FALSE)
+ {
+ throw new Exception('Failed to read response from stream');
+ }
+ else
+ {
+ $read += strlen($r);
+ $response .= $r;
+ }
+ }
+ while ($read < $size);
+ }
+ fread($this->redis_socket, 2); /* discard crlf */
+ break;
+ /* Multi-bulk reply */
+ case '*':
+ $count = intval(substr($reply, 1));
+ if ($count == '-1')
+ {
+ return NULL;
+ }
+ $response = array();
+ for ($i = 0; $i < $count; $i++)
+ {
+ $response[] = $this->read_response();
+ }
+ break;
+ /* Integer reply */
+ case ':':
+ $response = intval(substr(trim($reply), 1));
+ break;
+ default:
+ throw new RedisException("Unknown response: {$reply}");
+ break;
+ }
+ return $response;
+ }
+}
+
+
+//$redis = new cls_redis_client();
+//var_dump($redis->auth("foobared"));
+//var_dump($redis->set("name",'abc'));
+//var_dump($redis->get("name"));
+
diff --git a/vendor/owner888/phpspider/library/cls_redis_server.php b/vendor/owner888/phpspider/library/cls_redis_server.php
new file mode 100644
index 0000000..a206450
--- /dev/null
+++ b/vendor/owner888/phpspider/library/cls_redis_server.php
@@ -0,0 +1,179 @@
+
+ * @created time :2018-01-03
+ */
+class cls_redis_server
+{
+ private $socket = false;
+ private $process_num = 3;
+ public $redis_kv_data = array();
+ public $onMessage = null;
+
+ public function __construct($host="0.0.0.0", $port=6379)
+ {
+ $this->socket = stream_socket_server("tcp://".$host.":".$port,$errno, $errstr);
+ if (!$this->socket) die($errstr."--".$errno);
+ echo "listen $host $port \r\n";
+ }
+
+ private function parse_resp(&$conn)
+ {
+ // 读取一行,遇到 \r\n 为一行
+ $line = fgets($conn);
+ if($line === '' || $line === false)
+ {
+ return null;
+ }
+ // 获取第一个字符作为类型
+ $type = $line[0];
+ // 去掉第一个字符,去掉结尾的 \r\n
+ $line = mb_substr($line, 1, -2);
+ switch ( $type )
+ {
+ case "*":
+ // 得到长度
+ $count = (int) $line;
+ $data = array();
+ for ($i = 1; $i <= $count; $i++)
+ {
+ $data[] = $this->parse_resp($conn);
+ }
+ return $data;
+ case "$":
+ if ($line == '-1')
+ {
+ return null;
+ }
+ // 截取的长度要加上 \r\n 两个字符
+ $length = $line + 2;
+ $data = '';
+ while ($length > 0)
+ {
+ $block = fread($conn, $length);
+ if ($length !== strlen($block))
+ {
+ throw new Exception('RECEIVING');
+ }
+ $data .= $block;
+ $length -= mb_strlen($block);
+ }
+ return mb_substr($data, 0, -2);
+ }
+ return $line;
+ }
+
+ private function start_worker_process()
+ {
+ $pid = pcntl_fork();
+ switch ($pid)
+ {
+ case -1:
+ echo "fork error : {$i} \r\n";
+ exit;
+ case 0:
+ while ( true )
+ {
+ echo "PID ".posix_getpid()." waiting...\n";
+ // 堵塞等待
+ $conn = stream_socket_accept($this->socket, -1);
+ if ( !$conn )
+ {
+ continue;
+ }
+ //"*3\r\n$3\r\nSET\r\n$5\r\nmykey\r\n$7\r\nmyvalue\r\n"
+ while( true )
+ {
+ $arr = $this->parse_resp($conn);
+ if ( is_array($arr) )
+ {
+ if ($this->onMessage)
+ {
+ call_user_func($this->onMessage, $conn, $arr);
+ }
+ }
+ else if ( $arr )
+ {
+ if ($this->onMessage)
+ {
+ call_user_func($this->onMessage, $conn, $arr);
+ }
+ }
+ else
+ {
+ fclose($conn);
+ break;
+ }
+ }
+ }
+ default:
+ $this->pids[$pid] = $pid;
+ break;
+ }
+ }
+
+ public function run()
+ {
+ for($i = 1; $i <= $this->process_num; $i++)
+ {
+ $this->start_worker_process();
+ }
+
+ while( true )
+ {
+ foreach ($this->pids as $i => $pid)
+ {
+ if($pid)
+ {
+ $res = pcntl_waitpid($pid, $status,WNOHANG);
+
+ if ( $res == -1 || $res > 0 )
+ {
+ $this->start_worker_process();
+ unset($this->pids[$pid]);
+ }
+ }
+ }
+ sleep(1);
+ }
+ }
+
+}
+
+$server = new cls_redis_server();
+$server->onMessage = function($conn, $info) use($server)
+{
+ if ( is_array($info) )
+ {
+ $command = strtoupper($info[0]);
+ if ( $command == "SET" )
+ {
+ $key = $info[1];
+ $val = $info[2];
+ $server->redis_kv_data[$key] = $val;
+ fwrite($conn, "+OK\r\n");
+ }
+ else if ( $command == "GET" )
+ {
+ $key = $info[1];
+ $val = isset($server->redis_kv_data[$key]) ? $server->redis_kv_data[$key] : '';
+ fwrite($conn, "$".strlen($val)."\r\n".$val."\r\n");
+ }
+ else
+ {
+ fwrite($conn,"+OK\r\n");
+ }
+ }
+ else
+ {
+ fwrite($conn,"+OK\r\n");
+ }
+};
+$server->run();
diff --git a/vendor/owner888/phpspider/library/phpquery.php b/vendor/owner888/phpspider/library/phpquery.php
new file mode 100644
index 0000000..d4be6e4
--- /dev/null
+++ b/vendor/owner888/phpspider/library/phpquery.php
@@ -0,0 +1,5727 @@
+
+ * @license http://www.opensource.org/licenses/mit-license.php MIT License
+ * @package phpQuery
+ */
+
+namespace phpspider\library;
+use DOMDocument;
+use DOMXpath;
+use Exception;
+
+// class names for instanceof
+// TODO move them as class constants into phpQuery
+define('DOMDOCUMENT', 'DOMDocument');
+define('DOMELEMENT', 'DOMElement');
+define('DOMNODELIST', 'DOMNodeList');
+define('DOMNODE', 'DOMNode');
+
+/**
+ * DOMEvent class.
+ *
+ * Based on
+ * @link http://developer.mozilla.org/En/DOM:event
+ * @author Tobiasz Cudnik
+ * @package phpQuery
+ * @todo implement ArrayAccess ?
+ */
+class DOMEvent {
+
+ /**
+ * Returns a boolean indicating whether the event bubbles up through the DOM or not.
+ *
+ * @var unknown_type
+ */
+ public $bubbles = true;
+ /**
+ * Returns a boolean indicating whether the event is cancelable.
+ *
+ * @var unknown_type
+ */
+ public $cancelable = true;
+ /**
+ * Returns a reference to the currently registered target for the event.
+ *
+ * @var unknown_type
+ */
+ public $currentTarget;
+ /**
+ * Returns detail about the event, depending on the type of event.
+ *
+ * @var unknown_type
+ * @link http://developer.mozilla.org/en/DOM/event.detail
+ */
+ public $detail; // ???
+ /**
+ * Used to indicate which phase of the event flow is currently being evaluated.
+ *
+ * NOT IMPLEMENTED
+ *
+ * @var unknown_type
+ * @link http://developer.mozilla.org/en/DOM/event.eventPhase
+ */
+ public $eventPhase; // ???
+ /**
+ * The explicit original target of the event (Mozilla-specific).
+ *
+ * NOT IMPLEMENTED
+ *
+ * @var unknown_type
+ */
+ public $explicitOriginalTarget; // moz only
+ /**
+ * The original target of the event, before any retargetings (Mozilla-specific).
+ *
+ * NOT IMPLEMENTED
+ *
+ * @var unknown_type
+ */
+ public $originalTarget; // moz only
+ /**
+ * Identifies a secondary target for the event.
+ *
+ * @var unknown_type
+ */
+ public $relatedTarget;
+ /**
+ * Returns a reference to the target to which the event was originally dispatched.
+ *
+ * @var unknown_type
+ */
+ public $target;
+ /**
+ * Returns the time that the event was created.
+ *
+ * @var unknown_type
+ */
+ public $timeStamp;
+ /**
+ * Returns the name of the event (case-insensitive).
+ */
+ public $type;
+ public $runDefault = true;
+ public $data = null;
+ public function __construct($data) {
+ foreach($data as $k => $v) {
+ $this->$k = $v;
+ }
+ if (! $this->timeStamp)
+ $this->timeStamp = time();
+ }
+ /**
+ * Cancels the event (if it is cancelable).
+ *
+ */
+ public function preventDefault() {
+ $this->runDefault = false;
+ }
+ /**
+ * Stops the propagation of events further along in the DOM.
+ *
+ */
+ public function stopPropagation() {
+ $this->bubbles = false;
+ }
+}
+
+
+/**
+ * DOMDocumentWrapper class simplifies work with DOMDocument.
+ *
+ * Know bug:
+ * - in XHTML fragments,
changes to
+ *
+ * @todo check XML catalogs compatibility
+ * @author Tobiasz Cudnik
+ * @package phpQuery
+ */
+class DOMDocumentWrapper {
+ /**
+ * @var DOMDocument
+ */
+ public $document;
+ public $id;
+ /**
+ * @todo Rewrite as method and quess if null.
+ * @var unknown_type
+ */
+ public $contentType = '';
+ public $xpath;
+ public $uuid = 0;
+ public $data = array();
+ public $dataNodes = array();
+ public $events = array();
+ public $eventsNodes = array();
+ public $eventsGlobal = array();
+ /**
+ * @TODO iframes support http://code.google.com/p/phpquery/issues/detail?id=28
+ * @var unknown_type
+ */
+ public $frames = array();
+ /**
+ * Document root, by default equals to document itself.
+ * Used by documentFragments.
+ *
+ * @var DOMNode
+ */
+ public $root;
+ public $isDocumentFragment;
+ public $isXML = false;
+ public $isXHTML = false;
+ public $isHTML = false;
+ public $charset;
+ public function __construct($markup = null, $contentType = null, $newDocumentID = null) {
+ if (isset($markup))
+ $this->load($markup, $contentType, $newDocumentID);
+ $this->id = $newDocumentID
+ ? $newDocumentID
+ : md5(microtime());
+ }
+ public function load($markup, $contentType = null, $newDocumentID = null) {
+// phpQuery::$documents[$id] = $this;
+ $this->contentType = strtolower($contentType);
+ if ($markup instanceof DOMDOCUMENT) {
+ $this->document = $markup;
+ $this->root = $this->document;
+ $this->charset = $this->document->encoding;
+ // TODO isDocumentFragment
+ } else {
+ $loaded = $this->loadMarkup($markup);
+ }
+ if ($loaded) {
+// $this->document->formatOutput = true;
+ $this->document->preserveWhiteSpace = true;
+ $this->xpath = new DOMXPath($this->document);
+ $this->afterMarkupLoad();
+ return true;
+ // remember last loaded document
+// return phpQuery::selectDocument($id);
+ }
+ return false;
+ }
+ protected function afterMarkupLoad() {
+ if ($this->isXHTML) {
+ $this->xpath->registerNamespace("html", "http://www.w3.org/1999/xhtml");
+ }
+ }
+ protected function loadMarkup($markup) {
+ $loaded = false;
+ if ($this->contentType) {
+ self::debug("Load markup for content type {$this->contentType}");
+ // content determined by contentType
+ list($contentType, $charset) = $this->contentTypeToArray($this->contentType);
+ switch($contentType) {
+ case 'text/html':
+ phpQuery::debug("Loading HTML, content type '{$this->contentType}'");
+ $loaded = $this->loadMarkupHTML($markup, $charset);
+ break;
+ case 'text/xml':
+ case 'application/xhtml+xml':
+ phpQuery::debug("Loading XML, content type '{$this->contentType}'");
+ $loaded = $this->loadMarkupXML($markup, $charset);
+ break;
+ default:
+ // for feeds or anything that sometimes doesn't use text/xml
+ if (strpos('xml', $this->contentType) !== false) {
+ phpQuery::debug("Loading XML, content type '{$this->contentType}'");
+ $loaded = $this->loadMarkupXML($markup, $charset);
+ } else
+ phpQuery::debug("Could not determine document type from content type '{$this->contentType}'");
+ }
+ } else {
+ // content type autodetection
+ if ($this->isXML($markup)) {
+ phpQuery::debug("Loading XML, isXML() == true");
+ $loaded = $this->loadMarkupXML($markup);
+ if (! $loaded && $this->isXHTML) {
+ phpQuery::debug('Loading as XML failed, trying to load as HTML, isXHTML == true');
+ $loaded = $this->loadMarkupHTML($markup);
+ }
+ } else {
+ phpQuery::debug("Loading HTML, isXML() == false");
+ $loaded = $this->loadMarkupHTML($markup);
+ }
+ }
+ return $loaded;
+ }
+ protected function loadMarkupReset() {
+ $this->isXML = $this->isXHTML = $this->isHTML = false;
+ }
+ protected function documentCreate($charset, $version = '1.0') {
+ if (! $version)
+ $version = '1.0';
+ $this->document = new DOMDocument($version, $charset);
+ $this->charset = $this->document->encoding;
+// $this->document->encoding = $charset;
+ $this->document->formatOutput = true;
+ $this->document->preserveWhiteSpace = true;
+ }
+ protected function loadMarkupHTML($markup, $requestedCharset = null) {
+ if (phpQuery::$debug)
+ phpQuery::debug('Full markup load (HTML): '.substr($markup, 0, 250));
+ $this->loadMarkupReset();
+ $this->isHTML = true;
+ if (!isset($this->isDocumentFragment))
+ $this->isDocumentFragment = self::isDocumentFragmentHTML($markup);
+ $charset = null;
+ $documentCharset = $this->charsetFromHTML($markup);
+ $addDocumentCharset = false;
+ if ($documentCharset) {
+ $charset = $documentCharset;
+ $markup = $this->charsetFixHTML($markup);
+ } else if ($requestedCharset) {
+ $charset = $requestedCharset;
+ }
+ if (! $charset)
+ $charset = phpQuery::$defaultCharset;
+ // HTTP 1.1 says that the default charset is ISO-8859-1
+ // @see http://www.w3.org/International/O-HTTP-charset
+ if (! $documentCharset) {
+ $documentCharset = 'ISO-8859-1';
+ $addDocumentCharset = true;
+ }
+ // Should be careful here, still need 'magic encoding detection' since lots of pages have other 'default encoding'
+ // Worse, some pages can have mixed encodings... we'll try not to worry about that
+ $requestedCharset = strtoupper($requestedCharset);
+ $documentCharset = strtoupper($documentCharset);
+ phpQuery::debug("DOC: $documentCharset REQ: $requestedCharset");
+ if ($requestedCharset && $documentCharset && $requestedCharset !== $documentCharset) {
+ phpQuery::debug("CHARSET CONVERT");
+ // Document Encoding Conversion
+ // http://code.google.com/p/phpquery/issues/detail?id=86
+ if (function_exists('mb_detect_encoding')) {
+ $possibleCharsets = array($documentCharset, $requestedCharset, 'AUTO');
+ $docEncoding = mb_detect_encoding($markup, implode(', ', $possibleCharsets));
+ if (! $docEncoding)
+ $docEncoding = $documentCharset; // ok trust the document
+ phpQuery::debug("DETECTED '$docEncoding'");
+ // Detected does not match what document says...
+ if ($docEncoding !== $documentCharset) {
+ // Tricky..
+ }
+ if ($docEncoding !== $requestedCharset) {
+ phpQuery::debug("CONVERT $docEncoding => $requestedCharset");
+ $markup = mb_convert_encoding($markup, $requestedCharset, $docEncoding);
+ $markup = $this->charsetAppendToHTML($markup, $requestedCharset);
+ $charset = $requestedCharset;
+ }
+ } else {
+ phpQuery::debug("TODO: charset conversion without mbstring...");
+ }
+ }
+ $return = false;
+ if ($this->isDocumentFragment) {
+ phpQuery::debug("Full markup load (HTML), DocumentFragment detected, using charset '$charset'");
+ $return = $this->documentFragmentLoadMarkup($this, $charset, $markup);
+ } else {
+ if ($addDocumentCharset) {
+ phpQuery::debug("Full markup load (HTML), appending charset: '$charset'");
+ $markup = $this->charsetAppendToHTML($markup, $charset);
+ }
+ phpQuery::debug("Full markup load (HTML), documentCreate('$charset')");
+ $this->documentCreate($charset);
+ $return = phpQuery::$debug === 2
+ ? $this->document->loadHTML($markup)
+ : @$this->document->loadHTML($markup);
+ if ($return)
+ $this->root = $this->document;
+ }
+ if ($return && ! $this->contentType)
+ $this->contentType = 'text/html';
+ return $return;
+ }
+ protected function loadMarkupXML($markup, $requestedCharset = null) {
+ if (phpQuery::$debug)
+ phpQuery::debug('Full markup load (XML): '.substr($markup, 0, 250));
+ $this->loadMarkupReset();
+ $this->isXML = true;
+ // check agains XHTML in contentType or markup
+ $isContentTypeXHTML = $this->isXHTML();
+ $isMarkupXHTML = $this->isXHTML($markup);
+ if ($isContentTypeXHTML || $isMarkupXHTML) {
+ self::debug('Full markup load (XML), XHTML detected');
+ $this->isXHTML = true;
+ }
+ // determine document fragment
+ if (! isset($this->isDocumentFragment))
+ $this->isDocumentFragment = $this->isXHTML
+ ? self::isDocumentFragmentXHTML($markup)
+ : self::isDocumentFragmentXML($markup);
+ // this charset will be used
+ $charset = null;
+ // charset from XML declaration @var string
+ $documentCharset = $this->charsetFromXML($markup);
+ if (! $documentCharset) {
+ if ($this->isXHTML) {
+ // this is XHTML, try to get charset from content-type meta header
+ $documentCharset = $this->charsetFromHTML($markup);
+ if ($documentCharset) {
+ phpQuery::debug("Full markup load (XML), appending XHTML charset '$documentCharset'");
+ $this->charsetAppendToXML($markup, $documentCharset);
+ $charset = $documentCharset;
+ }
+ }
+ if (! $documentCharset) {
+ // if still no document charset...
+ $charset = $requestedCharset;
+ }
+ } else if ($requestedCharset) {
+ $charset = $requestedCharset;
+ }
+ if (! $charset) {
+ $charset = phpQuery::$defaultCharset;
+ }
+ if ($requestedCharset && $documentCharset && $requestedCharset != $documentCharset) {
+ // TODO place for charset conversion
+// $charset = $requestedCharset;
+ }
+ $return = false;
+ if ($this->isDocumentFragment) {
+ phpQuery::debug("Full markup load (XML), DocumentFragment detected, using charset '$charset'");
+ $return = $this->documentFragmentLoadMarkup($this, $charset, $markup);
+ } else {
+ // FIXME ???
+ if ($isContentTypeXHTML && ! $isMarkupXHTML)
+ if (! $documentCharset) {
+ phpQuery::debug("Full markup load (XML), appending charset '$charset'");
+ $markup = $this->charsetAppendToXML($markup, $charset);
+ }
+ // see http://pl2.php.net/manual/en/book.dom.php#78929
+ // LIBXML_DTDLOAD (>= PHP 5.1)
+ // does XML ctalogues works with LIBXML_NONET
+ // $this->document->resolveExternals = true;
+ // TODO test LIBXML_COMPACT for performance improvement
+ // create document
+ $this->documentCreate($charset);
+ if (phpversion() < 5.1) {
+ $this->document->resolveExternals = true;
+ $return = phpQuery::$debug === 2
+ ? $this->document->loadXML($markup)
+ : @$this->document->loadXML($markup);
+ } else {
+ /** @link http://pl2.php.net/manual/en/libxml.constants.php */
+ $libxmlStatic = phpQuery::$debug === 2
+ ? LIBXML_DTDLOAD|LIBXML_DTDATTR|LIBXML_NONET
+ : LIBXML_DTDLOAD|LIBXML_DTDATTR|LIBXML_NONET|LIBXML_NOWARNING|LIBXML_NOERROR;
+ $return = $this->document->loadXML($markup, $libxmlStatic);
+// if (! $return)
+// $return = $this->document->loadHTML($markup);
+ }
+ if ($return)
+ $this->root = $this->document;
+ }
+ if ($return) {
+ if (! $this->contentType) {
+ if ($this->isXHTML)
+ $this->contentType = 'application/xhtml+xml';
+ else
+ $this->contentType = 'text/xml';
+ }
+ return $return;
+ } else {
+ throw new Exception("Error loading XML markup");
+ }
+ }
+ protected function isXHTML($markup = null) {
+ if (! isset($markup)) {
+ return strpos($this->contentType, 'xhtml') !== false;
+ }
+ // XXX ok ?
+ return strpos($markup, "doctype) && is_object($dom->doctype)
+// ? $dom->doctype->publicId
+// : self::$defaultDoctype;
+ }
+ protected function isXML($markup) {
+// return strpos($markup, ']+http-equiv\\s*=\\s*(["|\'])Content-Type\\1([^>]+?)>@i',
+ $markup, $matches
+ );
+ if (! isset($matches[0]))
+ return array(null, null);
+ // get attr 'content'
+ preg_match('@content\\s*=\\s*(["|\'])(.+?)\\1@', $matches[0], $matches);
+ if (! isset($matches[0]))
+ return array(null, null);
+ return $this->contentTypeToArray($matches[2]);
+ }
+ protected function charsetFromHTML($markup) {
+ $contentType = $this->contentTypeFromHTML($markup);
+ return $contentType[1];
+ }
+ protected function charsetFromXML($markup) {
+ $matches;
+ // find declaration
+ preg_match('@<'.'?xml[^>]+encoding\\s*=\\s*(["|\'])(.*?)\\1@i',
+ $markup, $matches
+ );
+ return isset($matches[2])
+ ? strtolower($matches[2])
+ : null;
+ }
+ /**
+ * Repositions meta[type=charset] at the start of head. Bypasses DOMDocument bug.
+ *
+ * @link http://code.google.com/p/phpquery/issues/detail?id=80
+ * @param $html
+ */
+ protected function charsetFixHTML($markup) {
+ $matches = array();
+ // find meta tag
+ preg_match('@\s*]+http-equiv\\s*=\\s*(["|\'])Content-Type\\1([^>]+?)>@i',
+ $markup, $matches, PREG_OFFSET_CAPTURE
+ );
+ if (! isset($matches[0]))
+ return;
+ $metaContentType = $matches[0][0];
+ $markup = substr($markup, 0, $matches[0][1])
+ .substr($markup, $matches[0][1]+strlen($metaContentType));
+ $headStart = stripos($markup, '');
+ $markup = substr($markup, 0, $headStart+6).$metaContentType
+ .substr($markup, $headStart+6);
+ return $markup;
+ }
+ protected function charsetAppendToHTML($html, $charset, $xhtml = false) {
+ // remove existing meta[type=content-type]
+ $html = preg_replace('@\s*]+http-equiv\\s*=\\s*(["|\'])Content-Type\\1([^>]+?)>@i', '', $html);
+ $meta = '';
+ if (strpos($html, ')@s',
+ "{$meta}",
+ $html
+ );
+ }
+ } else {
+ return preg_replace(
+ '@)@s',
+ ''.$meta,
+ $html
+ );
+ }
+ }
+ protected function charsetAppendToXML($markup, $charset) {
+ $declaration = '<'.'?xml version="1.0" encoding="'.$charset.'"?'.'>';
+ return $declaration.$markup;
+ }
+ public static function isDocumentFragmentHTML($markup) {
+ return stripos($markup, 'documentFragmentCreate($node, $sourceCharset);
+// if ($fake === false)
+// throw new Exception("Error loading documentFragment markup");
+// else
+// $return = array_merge($return,
+// $this->import($fake->root->childNodes)
+// );
+// } else {
+// $return[] = $this->document->importNode($node, true);
+// }
+// }
+// return $return;
+// } else {
+// // string markup
+// $fake = $this->documentFragmentCreate($source, $sourceCharset);
+// if ($fake === false)
+// throw new Exception("Error loading documentFragment markup");
+// else
+// return $this->import($fake->root->childNodes);
+// }
+ if (is_array($source) || $source instanceof DOMNODELIST) {
+ // dom nodes
+ self::debug('Importing nodes to document');
+ foreach($source as $node)
+ $return[] = $this->document->importNode($node, true);
+ } else {
+ // string markup
+ $fake = $this->documentFragmentCreate($source, $sourceCharset);
+ if ($fake === false)
+ throw new Exception("Error loading documentFragment markup");
+ else
+ return $this->import($fake->root->childNodes);
+ }
+ return $return;
+ }
+ /**
+ * Creates new document fragment.
+ *
+ * @param $source
+ * @return DOMDocumentWrapper
+ */
+ protected function documentFragmentCreate($source, $charset = null) {
+ $fake = new DOMDocumentWrapper();
+ $fake->contentType = $this->contentType;
+ $fake->isXML = $this->isXML;
+ $fake->isHTML = $this->isHTML;
+ $fake->isXHTML = $this->isXHTML;
+ $fake->root = $fake->document;
+ if (! $charset)
+ $charset = $this->charset;
+// $fake->documentCreate($this->charset);
+ if ($source instanceof DOMNODE && !($source instanceof DOMNODELIST))
+ $source = array($source);
+ if (is_array($source) || $source instanceof DOMNODELIST) {
+ // dom nodes
+ // load fake document
+ if (! $this->documentFragmentLoadMarkup($fake, $charset))
+ return false;
+ $nodes = $fake->import($source);
+ foreach($nodes as $node)
+ $fake->root->appendChild($node);
+ } else {
+ // string markup
+ $this->documentFragmentLoadMarkup($fake, $charset, $source);
+ }
+ return $fake;
+ }
+ /**
+ *
+ * @param $document DOMDocumentWrapper
+ * @param $markup
+ * @return $document
+ */
+ private function documentFragmentLoadMarkup($fragment, $charset, $markup = null) {
+ // TODO error handling
+ // TODO copy doctype
+ // tempolary turn off
+ $fragment->isDocumentFragment = false;
+ if ($fragment->isXML) {
+ if ($fragment->isXHTML) {
+ // add FAKE element to set default namespace
+ $fragment->loadMarkupXML(''
+ .''
+ .''.$markup.'');
+ $fragment->root = $fragment->document->firstChild->nextSibling;
+ } else {
+ $fragment->loadMarkupXML(''.$markup.'');
+ $fragment->root = $fragment->document->firstChild;
+ }
+ } else {
+ $markup2 = phpQuery::$defaultDoctype.'';
+ $noBody = strpos($markup, 'loadMarkupHTML($markup2);
+ // TODO resolv body tag merging issue
+ $fragment->root = $noBody
+ ? $fragment->document->firstChild->nextSibling->firstChild->nextSibling
+ : $fragment->document->firstChild->nextSibling->firstChild->nextSibling;
+ }
+ if (! $fragment->root)
+ return false;
+ $fragment->isDocumentFragment = true;
+ return true;
+ }
+ protected function documentFragmentToMarkup($fragment) {
+ phpQuery::debug('documentFragmentToMarkup');
+ $tmp = $fragment->isDocumentFragment;
+ $fragment->isDocumentFragment = false;
+ $markup = $fragment->markup();
+ if ($fragment->isXML) {
+ $markup = substr($markup, 0, strrpos($markup, ''));
+ if ($fragment->isXHTML) {
+ $markup = substr($markup, strpos($markup, '')+6);
+ }
+ } else {
+ $markup = substr($markup, strpos($markup, '')+6);
+ $markup = substr($markup, 0, strrpos($markup, ''));
+ }
+ $fragment->isDocumentFragment = $tmp;
+ if (phpQuery::$debug)
+ phpQuery::debug('documentFragmentToMarkup: '.substr($markup, 0, 150));
+ return $markup;
+ }
+ /**
+ * Return document markup, starting with optional $nodes as root.
+ *
+ * @param $nodes DOMNode|DOMNodeList
+ * @return string
+ */
+ public function markup($nodes = null, $innerMarkup = false) {
+ if (isset($nodes) && count($nodes) == 1 && $nodes[0] instanceof DOMDOCUMENT)
+ $nodes = null;
+ if (isset($nodes)) {
+ $markup = '';
+ if (!is_array($nodes) && !($nodes instanceof DOMNODELIST) )
+ $nodes = array($nodes);
+ if ($this->isDocumentFragment && ! $innerMarkup)
+ foreach($nodes as $i => $node)
+ if ($node->isSameNode($this->root)) {
+ // var_dump($node);
+ $nodes = array_slice($nodes, 0, $i)
+ + phpQuery::DOMNodeListToArray($node->childNodes)
+ + array_slice($nodes, $i+1);
+ }
+ if ($this->isXML && ! $innerMarkup) {
+ self::debug("Getting outerXML with charset '{$this->charset}'");
+ // we need outerXML, so we can benefit from
+ // $node param support in saveXML()
+ foreach($nodes as $node)
+ $markup .= $this->document->saveXML($node);
+ } else {
+ $loop = array();
+ if ($innerMarkup)
+ foreach($nodes as $node) {
+ if ($node->childNodes)
+ foreach($node->childNodes as $child)
+ $loop[] = $child;
+ else
+ $loop[] = $node;
+ }
+ else
+ $loop = $nodes;
+ self::debug("Getting markup, moving selected nodes (".count($loop).") to new DocumentFragment");
+ $fake = $this->documentFragmentCreate($loop);
+ $markup = $this->documentFragmentToMarkup($fake);
+ }
+ if ($this->isXHTML) {
+ self::debug("Fixing XHTML");
+ $markup = self::markupFixXHTML($markup);
+ }
+ self::debug("Markup: ".substr($markup, 0, 250));
+ return $markup;
+ } else {
+ if ($this->isDocumentFragment) {
+ // documentFragment, html only...
+ self::debug("Getting markup, DocumentFragment detected");
+// return $this->markup(
+//// $this->document->getElementsByTagName('body')->item(0)
+// $this->document->root, true
+// );
+ $markup = $this->documentFragmentToMarkup($this);
+ // no need for markupFixXHTML, as it's done thought markup($nodes) method
+ return $markup;
+ } else {
+ self::debug("Getting markup (".($this->isXML?'XML':'HTML')."), final with charset '{$this->charset}'");
+ $markup = $this->isXML
+ ? $this->document->saveXML()
+ : $this->document->saveHTML();
+ if ($this->isXHTML) {
+ self::debug("Fixing XHTML");
+ $markup = self::markupFixXHTML($markup);
+ }
+ self::debug("Markup: ".substr($markup, 0, 250));
+ return $markup;
+ }
+ }
+ }
+ protected static function markupFixXHTML($markup) {
+ $markup = self::expandEmptyTag('script', $markup);
+ $markup = self::expandEmptyTag('select', $markup);
+ $markup = self::expandEmptyTag('textarea', $markup);
+ return $markup;
+ }
+ public static function debug($text) {
+ phpQuery::debug($text);
+ }
+ /**
+ * expandEmptyTag
+ *
+ * @param $tag
+ * @param $xml
+ * @return unknown_type
+ * @author mjaque at ilkebenson dot com
+ * @link http://php.net/manual/en/domdocument.savehtml.php#81256
+ */
+ public static function expandEmptyTag($tag, $xml){
+ $indice = 0;
+ while ($indice< strlen($xml)){
+ $pos = strpos($xml, "<$tag ", $indice);
+ if ($pos){
+ $posCierre = strpos($xml, ">", $pos);
+ if ($xml[$posCierre-1] == "/"){
+ $xml = substr_replace($xml, ">$tag>", $posCierre-1, 2);
+ }
+ $indice = $posCierre;
+ }
+ else break;
+ }
+ return $xml;
+ }
+}
+
+/**
+ * Event handling class.
+ *
+ * @author Tobiasz Cudnik
+ * @package phpQuery
+ * @static
+ */
+abstract class phpQueryEvents {
+ /**
+ * Trigger a type of event on every matched element.
+ *
+ * @param DOMNode|phpQueryObject|string $document
+ * @param unknown_type $type
+ * @param unknown_type $data
+ *
+ * @TODO exclusive events (with !)
+ * @TODO global events (test)
+ * @TODO support more than event in $type (space-separated)
+ */
+ public static function trigger($document, $type, $data = array(), $node = null) {
+ // trigger: function(type, data, elem, donative, extra) {
+ $documentID = phpQuery::getDocumentID($document);
+ $namespace = null;
+ if (strpos($type, '.') !== false)
+ list($name, $namespace) = explode('.', $type);
+ else
+ $name = $type;
+ if (! $node) {
+ if (self::issetGlobal($documentID, $type)) {
+ $pq = phpQuery::getDocument($documentID);
+ // TODO check add($pq->document)
+ $pq->find('*')->add($pq->document)
+ ->trigger($type, $data);
+ }
+ } else {
+ if (isset($data[0]) && $data[0] instanceof DOMEvent) {
+ $event = $data[0];
+ $event->relatedTarget = $event->target;
+ $event->target = $node;
+ $data = array_slice($data, 1);
+ } else {
+ $event = new DOMEvent(array(
+ 'type' => $type,
+ 'target' => $node,
+ 'timeStamp' => time(),
+ ));
+ }
+ $i = 0;
+ while($node) {
+ // TODO whois
+ phpQuery::debug("Triggering ".($i?"bubbled ":'')."event '{$type}' on "
+ ."node \n");//.phpQueryObject::whois($node)."\n");
+ $event->currentTarget = $node;
+ $eventNode = self::getNode($documentID, $node);
+ if (isset($eventNode->eventHandlers)) {
+ foreach($eventNode->eventHandlers as $eventType => $handlers) {
+ $eventNamespace = null;
+ if (strpos($type, '.') !== false)
+ list($eventName, $eventNamespace) = explode('.', $eventType);
+ else
+ $eventName = $eventType;
+ if ($name != $eventName)
+ continue;
+ if ($namespace && $eventNamespace && $namespace != $eventNamespace)
+ continue;
+ foreach($handlers as $handler) {
+ phpQuery::debug("Calling event handler\n");
+ $event->data = $handler['data']
+ ? $handler['data']
+ : null;
+ $params = array_merge(array($event), $data);
+ $return = phpQuery::callbackRun($handler['callback'], $params);
+ if ($return === false) {
+ $event->bubbles = false;
+ }
+ }
+ }
+ }
+ // to bubble or not to bubble...
+ if (! $event->bubbles)
+ break;
+ $node = $node->parentNode;
+ $i++;
+ }
+ }
+ }
+ /**
+ * Binds a handler to one or more events (like click) for each matched element.
+ * Can also bind custom events.
+ *
+ * @param DOMNode|phpQueryObject|string $document
+ * @param unknown_type $type
+ * @param unknown_type $data Optional
+ * @param unknown_type $callback
+ *
+ * @TODO support '!' (exclusive) events
+ * @TODO support more than event in $type (space-separated)
+ * @TODO support binding to global events
+ */
+ public static function add($document, $node, $type, $data, $callback = null) {
+ phpQuery::debug("Binding '$type' event");
+ $documentID = phpQuery::getDocumentID($document);
+// if (is_null($callback) && is_callable($data)) {
+// $callback = $data;
+// $data = null;
+// }
+ $eventNode = self::getNode($documentID, $node);
+ if (! $eventNode)
+ $eventNode = self::setNode($documentID, $node);
+ if (!isset($eventNode->eventHandlers[$type]))
+ $eventNode->eventHandlers[$type] = array();
+ $eventNode->eventHandlers[$type][] = array(
+ 'callback' => $callback,
+ 'data' => $data,
+ );
+ }
+ /**
+ * Enter description here...
+ *
+ * @param DOMNode|phpQueryObject|string $document
+ * @param unknown_type $type
+ * @param unknown_type $callback
+ *
+ * @TODO namespace events
+ * @TODO support more than event in $type (space-separated)
+ */
+ public static function remove($document, $node, $type = null, $callback = null) {
+ $documentID = phpQuery::getDocumentID($document);
+ $eventNode = self::getNode($documentID, $node);
+ if (is_object($eventNode) && isset($eventNode->eventHandlers[$type])) {
+ if ($callback) {
+ foreach($eventNode->eventHandlers[$type] as $k => $handler)
+ if ($handler['callback'] == $callback)
+ unset($eventNode->eventHandlers[$type][$k]);
+ } else {
+ unset($eventNode->eventHandlers[$type]);
+ }
+ }
+ }
+ protected static function getNode($documentID, $node) {
+ foreach(phpQuery::$documents[$documentID]->eventsNodes as $eventNode) {
+ if ($node->isSameNode($eventNode))
+ return $eventNode;
+ }
+ }
+ protected static function setNode($documentID, $node) {
+ phpQuery::$documents[$documentID]->eventsNodes[] = $node;
+ return phpQuery::$documents[$documentID]->eventsNodes[
+ count(phpQuery::$documents[$documentID]->eventsNodes)-1
+ ];
+ }
+ protected static function issetGlobal($documentID, $type) {
+ return isset(phpQuery::$documents[$documentID])
+ ? in_array($type, phpQuery::$documents[$documentID]->eventsGlobal)
+ : false;
+ }
+}
+
+
+interface ICallbackNamed {
+ function hasName();
+ function getName();
+}
+/**
+ * Callback class introduces currying-like pattern.
+ *
+ * Example:
+ * function foo($param1, $param2, $param3) {
+ * var_dump($param1, $param2, $param3);
+ * }
+ * $fooCurried = new Callback('foo',
+ * 'param1 is now statically set',
+ * new CallbackParam, new CallbackParam
+ * );
+ * phpQuery::callbackRun($fooCurried,
+ * array('param2 value', 'param3 value'
+ * );
+ *
+ * Callback class is supported in all phpQuery methods which accepts callbacks.
+ *
+ * @link http://code.google.com/p/phpquery/wiki/Callbacks#Param_Structures
+ * @author Tobiasz Cudnik
+ *
+ * @TODO??? return fake forwarding function created via create_function
+ * @TODO honor paramStructure
+ */
+class Callback
+ implements ICallbackNamed {
+ public $callback = null;
+ public $params = null;
+ protected $name;
+ public function __construct($callback, $param1 = null, $param2 = null,
+ $param3 = null) {
+ $params = func_get_args();
+ $params = array_slice($params, 1);
+ if ($callback instanceof Callback) {
+ // TODO implement recurention
+ } else {
+ $this->callback = $callback;
+ $this->params = $params;
+ }
+ }
+ public function getName() {
+ return 'Callback: '.$this->name;
+ }
+ public function hasName() {
+ return isset($this->name) && $this->name;
+ }
+ public function setName($name) {
+ $this->name = $name;
+ return $this;
+ }
+ // TODO test me
+// public function addParams() {
+// $params = func_get_args();
+// return new Callback($this->callback, $this->params+$params);
+// }
+}
+/**
+ * Shorthand for new Callback(create_function(...), ...);
+ *
+ * @author Tobiasz Cudnik
+ */
+class CallbackBody extends Callback {
+ public function __construct($paramList, $code, $param1 = null, $param2 = null,
+ $param3 = null) {
+ $params = func_get_args();
+ $params = array_slice($params, 2);
+ $this->callback = create_function($paramList, $code);
+ $this->params = $params;
+ }
+}
+/**
+ * Callback type which on execution returns reference passed during creation.
+ *
+ * @author Tobiasz Cudnik
+ */
+class CallbackReturnReference extends Callback
+ implements ICallbackNamed {
+ protected $reference;
+ public function __construct(&$reference, $name = null){
+ $this->reference =& $reference;
+ $this->callback = array($this, 'callback');
+ }
+ public function callback() {
+ return $this->reference;
+ }
+ public function getName() {
+ return 'Callback: '.$this->name;
+ }
+ public function hasName() {
+ return isset($this->name) && $this->name;
+ }
+}
+/**
+ * Callback type which on execution returns value passed during creation.
+ *
+ * @author Tobiasz Cudnik
+ */
+class CallbackReturnValue extends Callback
+ implements ICallbackNamed {
+ protected $value;
+ protected $name;
+ public function __construct($value, $name = null){
+ $this->value =& $value;
+ $this->name = $name;
+ $this->callback = array($this, 'callback');
+ }
+ public function callback() {
+ return $this->value;
+ }
+ public function __toString() {
+ return $this->getName();
+ }
+ public function getName() {
+ return 'Callback: '.$this->name;
+ }
+ public function hasName() {
+ return isset($this->name) && $this->name;
+ }
+}
+/**
+ * CallbackParameterToReference can be used when we don't really want a callback,
+ * only parameter passed to it. CallbackParameterToReference takes first
+ * parameter's value and passes it to reference.
+ *
+ * @author Tobiasz Cudnik
+ */
+class CallbackParameterToReference extends Callback {
+ /**
+ * @param $reference
+ * @TODO implement $paramIndex;
+ * param index choose which callback param will be passed to reference
+ */
+ public function __construct(&$reference){
+ $this->callback =& $reference;
+ }
+}
+//class CallbackReference extends Callback {
+// /**
+// *
+// * @param $reference
+// * @param $paramIndex
+// * @todo implement $paramIndex; param index choose which callback param will be passed to reference
+// */
+// public function __construct(&$reference, $name = null){
+// $this->callback =& $reference;
+// }
+//}
+class CallbackParam {}
+
+/**
+ * Class representing phpQuery objects.
+ *
+ * @author Tobiasz Cudnik
+ * @package phpQuery
+ * @method phpQueryObject clone() clone()
+ * @method phpQueryObject empty() empty()
+ * @method phpQueryObject next() next($selector = null)
+ * @method phpQueryObject prev() prev($selector = null)
+ * @property Int $length
+ */
+class phpQueryObject
+ implements \Iterator, \Countable, \ArrayAccess {
+ public $documentID = null;
+ /**
+ * DOMDocument class.
+ *
+ * @var DOMDocument
+ */
+ public $document = null;
+ public $charset = null;
+ /**
+ *
+ * @var DOMDocumentWrapper
+ */
+ public $documentWrapper = null;
+ /**
+ * XPath interface.
+ *
+ * @var DOMXPath
+ */
+ public $xpath = null;
+ /**
+ * Stack of selected elements.
+ * @TODO refactor to ->nodes
+ * @var array
+ */
+ public $elements = array();
+ /**
+ * @access private
+ */
+ protected $elementsBackup = array();
+ /**
+ * @access private
+ */
+ protected $previous = null;
+ /**
+ * @access private
+ * @TODO deprecate
+ */
+ protected $root = array();
+ /**
+ * Indicated if doument is just a fragment (no tag).
+ *
+ * Every document is realy a full document, so even documentFragments can
+ * be queried against , but getDocument(id)->htmlOuter() will return
+ * only contents of .
+ *
+ * @var bool
+ */
+ public $documentFragment = true;
+ /**
+ * Iterator interface helper
+ * @access private
+ */
+ protected $elementsInterator = array();
+ /**
+ * Iterator interface helper
+ * @access private
+ */
+ protected $valid = false;
+ /**
+ * Iterator interface helper
+ * @access private
+ */
+ protected $current = null;
+ /**
+ * Enter description here...
+ *
+ * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery
+ */
+ public function __construct($documentID) {
+// if ($documentID instanceof self)
+// var_dump($documentID->getDocumentID());
+ $id = $documentID instanceof self
+ ? $documentID->getDocumentID()
+ : $documentID;
+// var_dump($id);
+ if (! isset(phpQuery::$documents[$id] )) {
+// var_dump(phpQuery::$documents);
+ throw new Exception("Document with ID '{$id}' isn't loaded. Use phpQuery::newDocument(\$html) or phpQuery::newDocumentFile(\$file) first.");
+ }
+ $this->documentID = $id;
+ $this->documentWrapper =& phpQuery::$documents[$id];
+ $this->document =& $this->documentWrapper->document;
+ $this->xpath =& $this->documentWrapper->xpath;
+ $this->charset =& $this->documentWrapper->charset;
+ $this->documentFragment =& $this->documentWrapper->isDocumentFragment;
+ // TODO check $this->DOM->documentElement;
+// $this->root = $this->document->documentElement;
+ $this->root =& $this->documentWrapper->root;
+// $this->toRoot();
+ $this->elements = array($this->root);
+ }
+ /**
+ *
+ * @access private
+ * @param $attr
+ * @return unknown_type
+ */
+ public function __get($attr) {
+ switch($attr) {
+ // FIXME doesnt work at all ?
+ case 'length':
+ return $this->size();
+ break;
+ default:
+ return $this->$attr;
+ }
+ }
+ /**
+ * Saves actual object to $var by reference.
+ * Useful when need to break chain.
+ * @param phpQueryObject $var
+ * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery
+ */
+ public function toReference(&$var) {
+ return $var = $this;
+ }
+ public function documentFragment($state = null) {
+ if ($state) {
+ phpQuery::$documents[$this->getDocumentID()]['documentFragment'] = $state;
+ return $this;
+ }
+ return $this->documentFragment;
+ }
+ /**
+ * @access private
+ * @TODO documentWrapper
+ */
+ protected function isRoot( $node) {
+// return $node instanceof DOMDOCUMENT || $node->tagName == 'html';
+ return $node instanceof DOMDOCUMENT
+ || ($node instanceof DOMELEMENT && $node->tagName == 'html')
+ || $this->root->isSameNode($node);
+ }
+ /**
+ * @access private
+ */
+ protected function stackIsRoot() {
+ return $this->size() == 1 && $this->isRoot($this->elements[0]);
+ }
+ /**
+ * Enter description here...
+ * NON JQUERY METHOD
+ *
+ * Watch out, it doesn't creates new instance, can be reverted with end().
+ *
+ * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery
+ */
+ public function toRoot() {
+ $this->elements = array($this->root);
+ return $this;
+// return $this->newInstance(array($this->root));
+ }
+ /**
+ * Saves object's DocumentID to $var by reference.
+ *
+ * $myDocumentId;
+ * phpQuery::newDocument('')
+ * ->getDocumentIDRef($myDocumentId)
+ * ->find('div')->...
+ *
+ *
+ * @param unknown_type $domId
+ * @see phpQuery::newDocument
+ * @see phpQuery::newDocumentFile
+ * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery
+ */
+ public function getDocumentIDRef(&$documentID) {
+ $documentID = $this->getDocumentID();
+ return $this;
+ }
+ /**
+ * Returns object with stack set to document root.
+ *
+ * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery
+ */
+ public function getDocument() {
+ return phpQuery::getDocument($this->getDocumentID());
+ }
+ /**
+ *
+ * @return DOMDocument
+ */
+ public function getDOMDocument() {
+ return $this->document;
+ }
+ /**
+ * Get object's Document ID.
+ *
+ * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery
+ */
+ public function getDocumentID() {
+ return $this->documentID;
+ }
+ /**
+ * Unloads whole document from memory.
+ * CAUTION! None further operations will be possible on this document.
+ * All objects refering to it will be useless.
+ *
+ * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery
+ */
+ public function unloadDocument() {
+ phpQuery::unloadDocuments($this->getDocumentID());
+ }
+ public function isHTML() {
+ return $this->documentWrapper->isHTML;
+ }
+ public function isXHTML() {
+ return $this->documentWrapper->isXHTML;
+ }
+ public function isXML() {
+ return $this->documentWrapper->isXML;
+ }
+ /**
+ * Enter description here...
+ *
+ * @link http://docs.jquery.com/Ajax/serialize
+ * @return string
+ */
+ public function serialize() {
+ return phpQuery::param($this->serializeArray());
+ }
+ /**
+ * Enter description here...
+ *
+ * @link http://docs.jquery.com/Ajax/serializeArray
+ * @return array
+ */
+ public function serializeArray($submit = null) {
+ $source = $this->filter('form, input, select, textarea')
+ ->find('input, select, textarea')
+ ->andSelf()
+ ->not('form');
+ $return = array();
+// $source->dumpDie();
+ foreach($source as $input) {
+ $input = phpQuery::pq($input);
+ if ($input->is('[disabled]'))
+ continue;
+ if (!$input->is('[name]'))
+ continue;
+ if ($input->is('[type=checkbox]') && !$input->is('[checked]'))
+ continue;
+ // jquery diff
+ if ($submit && $input->is('[type=submit]')) {
+ if ($submit instanceof DOMELEMENT && ! $input->elements[0]->isSameNode($submit))
+ continue;
+ else if (is_string($submit) && $input->attr('name') != $submit)
+ continue;
+ }
+ $return[] = array(
+ 'name' => $input->attr('name'),
+ 'value' => $input->val(),
+ );
+ }
+ return $return;
+ }
+ /**
+ * @access private
+ */
+ protected function debug($in) {
+ if (! phpQuery::$debug )
+ return;
+ print('');
+ print_r($in);
+ // file debug
+// file_put_contents(dirname(__FILE__).'/phpQuery.log', print_r($in, true)."\n", FILE_APPEND);
+ // quite handy debug trace
+// if ( is_array($in))
+// print_r(array_slice(debug_backtrace(), 3));
+ print("
\n");
+ }
+ /**
+ * @access private
+ */
+ protected function isRegexp($pattern) {
+ return in_array(
+ $pattern[ mb_strlen($pattern)-1 ],
+ array('^','*','$')
+ );
+ }
+ /**
+ * Determines if $char is really a char.
+ *
+ * @param string $char
+ * @return bool
+ * @todo rewrite me to charcode range ! ;)
+ * @access private
+ */
+ protected function isChar($char) {
+ return extension_loaded('mbstring') && phpQuery::$mbstringSupport
+ ? mb_eregi('\w', $char)
+ : preg_match('@\w@', $char);
+ }
+ /**
+ * @access private
+ */
+ protected function parseSelector($query) {
+ // clean spaces
+ // TODO include this inside parsing ?
+ $query = trim(
+ preg_replace('@\s+@', ' ',
+ preg_replace('@\s*(>|\\+|~)\s*@', '\\1', $query)
+ )
+ );
+ $queries = array(array());
+ if (! $query)
+ return $queries;
+ $return =& $queries[0];
+ $specialChars = array('>',' ');
+// $specialCharsMapping = array('/' => '>');
+ $specialCharsMapping = array();
+ $strlen = mb_strlen($query);
+ $classChars = array('.', '-');
+ $pseudoChars = array('-');
+ $tagChars = array('*', '|', '-');
+ // split multibyte string
+ // http://code.google.com/p/phpquery/issues/detail?id=76
+ $_query = array();
+ for ($i=0; $i<$strlen; $i++)
+ $_query[] = mb_substr($query, $i, 1);
+ $query = $_query;
+ // it works, but i dont like it...
+ $i = 0;
+ while( $i < $strlen) {
+ $c = $query[$i];
+ $tmp = '';
+ // TAG
+ if ($this->isChar($c) || in_array($c, $tagChars)) {
+ while(isset($query[$i])
+ && ($this->isChar($query[$i]) || in_array($query[$i], $tagChars))) {
+ $tmp .= $query[$i];
+ $i++;
+ }
+ $return[] = $tmp;
+ // IDs
+ } else if ( $c == '#') {
+ $i++;
+ while( isset($query[$i]) && ($this->isChar($query[$i]) || $query[$i] == '-')) {
+ $tmp .= $query[$i];
+ $i++;
+ }
+ $return[] = '#'.$tmp;
+ // SPECIAL CHARS
+ } else if (in_array($c, $specialChars)) {
+ $return[] = $c;
+ $i++;
+ // MAPPED SPECIAL MULTICHARS
+// } else if ( $c.$query[$i+1] == '//') {
+// $return[] = ' ';
+// $i = $i+2;
+ // MAPPED SPECIAL CHARS
+ } else if ( isset($specialCharsMapping[$c])) {
+ $return[] = $specialCharsMapping[$c];
+ $i++;
+ // COMMA
+ } else if ( $c == ',') {
+ $queries[] = array();
+ $return =& $queries[ count($queries)-1 ];
+ $i++;
+ while( isset($query[$i]) && $query[$i] == ' ')
+ $i++;
+ // CLASSES
+ } else if ($c == '.') {
+ while( isset($query[$i]) && ($this->isChar($query[$i]) || in_array($query[$i], $classChars))) {
+ $tmp .= $query[$i];
+ $i++;
+ }
+ $return[] = $tmp;
+ // ~ General Sibling Selector
+ } else if ($c == '~') {
+ $spaceAllowed = true;
+ $tmp .= $query[$i++];
+ while( isset($query[$i])
+ && ($this->isChar($query[$i])
+ || in_array($query[$i], $classChars)
+ || $query[$i] == '*'
+ || ($query[$i] == ' ' && $spaceAllowed)
+ )) {
+ if ($query[$i] != ' ')
+ $spaceAllowed = false;
+ $tmp .= $query[$i];
+ $i++;
+ }
+ $return[] = $tmp;
+ // + Adjacent sibling selectors
+ } else if ($c == '+') {
+ $spaceAllowed = true;
+ $tmp .= $query[$i++];
+ while( isset($query[$i])
+ && ($this->isChar($query[$i])
+ || in_array($query[$i], $classChars)
+ || $query[$i] == '*'
+ || ($spaceAllowed && $query[$i] == ' ')
+ )) {
+ if ($query[$i] != ' ')
+ $spaceAllowed = false;
+ $tmp .= $query[$i];
+ $i++;
+ }
+ $return[] = $tmp;
+ // ATTRS
+ } else if ($c == '[') {
+ $stack = 1;
+ $tmp .= $c;
+ while( isset($query[++$i])) {
+ $tmp .= $query[$i];
+ if ( $query[$i] == '[') {
+ $stack++;
+ } else if ( $query[$i] == ']') {
+ $stack--;
+ if (! $stack )
+ break;
+ }
+ }
+ $return[] = $tmp;
+ $i++;
+ // PSEUDO CLASSES
+ } else if ($c == ':') {
+ $stack = 1;
+ $tmp .= $query[$i++];
+ while( isset($query[$i]) && ($this->isChar($query[$i]) || in_array($query[$i], $pseudoChars))) {
+ $tmp .= $query[$i];
+ $i++;
+ }
+ // with arguments ?
+ if ( isset($query[$i]) && $query[$i] == '(') {
+ $tmp .= $query[$i];
+ $stack = 1;
+ while( isset($query[++$i])) {
+ $tmp .= $query[$i];
+ if ( $query[$i] == '(') {
+ $stack++;
+ } else if ( $query[$i] == ')') {
+ $stack--;
+ if (! $stack )
+ break;
+ }
+ }
+ $return[] = $tmp;
+ $i++;
+ } else {
+ $return[] = $tmp;
+ }
+ } else {
+ $i++;
+ }
+ }
+ foreach($queries as $k => $q) {
+ if (isset($q[0])) {
+ if (isset($q[0][0]) && $q[0][0] == ':')
+ array_unshift($queries[$k], '*');
+ if ($q[0] != '>')
+ array_unshift($queries[$k], ' ');
+ }
+ }
+ return $queries;
+ }
+ /**
+ * Return matched DOM nodes.
+ *
+ * @param int $index
+ * @return array|DOMElement Single DOMElement or array of DOMElement.
+ */
+ public function get($index = null, $callback1 = null, $callback2 = null, $callback3 = null) {
+ $return = isset($index)
+ ? (isset($this->elements[$index]) ? $this->elements[$index] : null)
+ : $this->elements;
+ // pass thou callbacks
+ $args = func_get_args();
+ $args = array_slice($args, 1);
+ foreach($args as $callback) {
+ if (is_array($return))
+ foreach($return as $k => $v)
+ $return[$k] = phpQuery::callbackRun($callback, array($v));
+ else
+ $return = phpQuery::callbackRun($callback, array($return));
+ }
+ return $return;
+ }
+ /**
+ * Return matched DOM nodes.
+ * jQuery difference.
+ *
+ * @param int $index
+ * @return array|string Returns string if $index != null
+ * @todo implement callbacks
+ * @todo return only arrays ?
+ * @todo maybe other name...
+ */
+ public function getString($index = null, $callback1 = null, $callback2 = null, $callback3 = null) {
+ if ($index)
+ $return = $this->eq($index)->text();
+ else {
+ $return = array();
+ for($i = 0; $i < $this->size(); $i++) {
+ $return[] = $this->eq($i)->text();
+ }
+ }
+ // pass thou callbacks
+ $args = func_get_args();
+ $args = array_slice($args, 1);
+ foreach($args as $callback) {
+ $return = phpQuery::callbackRun($callback, array($return));
+ }
+ return $return;
+ }
+ /**
+ * Return matched DOM nodes.
+ * jQuery difference.
+ *
+ * @param int $index
+ * @return array|string Returns string if $index != null
+ * @todo implement callbacks
+ * @todo return only arrays ?
+ * @todo maybe other name...
+ */
+ public function getStrings($index = null, $callback1 = null, $callback2 = null, $callback3 = null) {
+ if ($index)
+ $return = $this->eq($index)->text();
+ else {
+ $return = array();
+ for($i = 0; $i < $this->size(); $i++) {
+ $return[] = $this->eq($i)->text();
+ }
+ // pass thou callbacks
+ $args = func_get_args();
+ $args = array_slice($args, 1);
+ }
+ foreach($args as $callback) {
+ if (is_array($return))
+ foreach($return as $k => $v)
+ $return[$k] = phpQuery::callbackRun($callback, array($v));
+ else
+ $return = phpQuery::callbackRun($callback, array($return));
+ }
+ return $return;
+ }
+ /**
+ * Returns new instance of actual class.
+ *
+ * @param array $newStack Optional. Will replace old stack with new and move old one to history.c
+ */
+ public function newInstance($newStack = null) {
+ $class = get_class($this);
+ // support inheritance by passing old object to overloaded constructor
+ $new = $class != 'phpQuery'
+ ? new $class($this, $this->getDocumentID())
+ : new phpQueryObject($this->getDocumentID());
+ $new->previous = $this;
+ if (is_null($newStack)) {
+ $new->elements = $this->elements;
+ if ($this->elementsBackup)
+ $this->elements = $this->elementsBackup;
+ } else if (is_string($newStack)) {
+ $new->elements = phpQuery::pq($newStack, $this->getDocumentID())->stack();
+ } else {
+ $new->elements = $newStack;
+ }
+ return $new;
+ }
+
+ /**
+ * 匹配class
+ *
+ * In the future, when PHP will support XLS 2.0, then we would do that this way:
+ * contains(tokenize(@class, '\s'), "something")
+ * @param unknown_type $class
+ * @param unknown_type $node
+ * @return boolean
+ * @access private
+ */
+ protected function matchClasses($class, $node) {
+ // multi-class
+ if ( mb_strpos($class, '.', 1)) {
+ $classes = explode('.', substr($class, 1));
+ $classesCount = count( $classes );
+ $nodeClasses = explode(' ', $node->getAttribute('class') );
+ $nodeClassesCount = count( $nodeClasses );
+ if ( $classesCount > $nodeClassesCount )
+ return false;
+ $diff = count(
+ array_diff(
+ $classes,
+ $nodeClasses
+ )
+ );
+ if (! $diff )
+ return true;
+ // single-class
+ } else {
+ return in_array(
+ // strip leading dot from class name
+ substr($class, 1),
+ // get classes for element as array
+ explode(' ', $node->getAttribute('class') )
+ );
+ }
+ }
+
+ /**
+ * @access private
+ */
+ protected function runQuery($XQuery, $selector = null, $compare = null) {
+ if ($compare && ! method_exists($this, $compare))
+ return false;
+ $stack = array();
+ if (! $this->elements)
+ $this->debug('Stack empty, skipping...');
+// var_dump($this->elements[0]->nodeType);
+ // element, document
+ foreach($this->stack(array(1, 9, 13)) as $k => $stackNode) {
+ $detachAfter = false;
+ // to work on detached nodes we need temporary place them somewhere
+ // thats because context xpath queries sucks ;]
+ $testNode = $stackNode;
+ while ($testNode) {
+ if (! $testNode->parentNode && ! $this->isRoot($testNode)) {
+ $this->root->appendChild($testNode);
+ $detachAfter = $testNode;
+ break;
+ }
+ $testNode = isset($testNode->parentNode)
+ ? $testNode->parentNode
+ : null;
+ }
+ // XXX tmp ?
+ $xpath = $this->documentWrapper->isXHTML
+ ? $this->getNodeXpath($stackNode, 'html')
+ : $this->getNodeXpath($stackNode);
+ // FIXME pseudoclasses-only query, support XML
+ $query = $XQuery == '//' && $xpath == '/html[1]'
+ ? '//*'
+ : $xpath.$XQuery;
+ $this->debug("XPATH: {$query}");
+ // run query, get elements
+ $nodes = $this->xpath->query($query);
+ $this->debug("QUERY FETCHED");
+ if (! $nodes->length )
+ $this->debug('Nothing found');
+ $debug = array();
+ foreach($nodes as $node) {
+ $matched = false;
+ if ( $compare) {
+ phpQuery::$debug ?
+ $this->debug("Found: ".$this->whois( $node ).", comparing with {$compare}()")
+ : null;
+ $phpQueryDebug = phpQuery::$debug;
+ phpQuery::$debug = false;
+ // TODO ??? use phpQuery::callbackRun()
+ if (call_user_func_array(array($this, $compare), array($selector, $node)))
+ $matched = true;
+ phpQuery::$debug = $phpQueryDebug;
+ } else {
+ $matched = true;
+ }
+ if ( $matched) {
+ if (phpQuery::$debug)
+ $debug[] = $this->whois( $node );
+ $stack[] = $node;
+ }
+ }
+ if (phpQuery::$debug) {
+ $this->debug("Matched ".count($debug).": ".implode(', ', $debug));
+ }
+ if ($detachAfter)
+ $this->root->removeChild($detachAfter);
+ }
+ $this->elements = $stack;
+ }
+
+ /**
+ * Enter description here...
+ * css to xpath
+ * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery
+ */
+ public function find($selectors, $context = null, $noHistory = false) {
+ if (!$noHistory)
+ // backup last stack /for end()/
+ $this->elementsBackup = $this->elements;
+ // allow to define context
+ // TODO combine code below with phpQuery::pq() context guessing code
+ // as generic function
+ if ($context) {
+ if (! is_array($context) && $context instanceof DOMELEMENT)
+ $this->elements = array($context);
+ else if (is_array($context)) {
+ $this->elements = array();
+ foreach ($context as $c)
+ if ($c instanceof DOMELEMENT)
+ $this->elements[] = $c;
+ } else if ( $context instanceof self )
+ $this->elements = $context->elements;
+ }
+ $queries = $this->parseSelector($selectors);
+ $this->debug(array('FIND', $selectors, $queries));
+ $XQuery = '';
+ // remember stack state because of multi-queries
+ $oldStack = $this->elements;
+ // here we will be keeping found elements
+ $stack = array();
+ foreach($queries as $selector) {
+ $this->elements = $oldStack;
+ $delimiterBefore = false;
+ foreach($selector as $s) {
+ // TAG
+ $isTag = extension_loaded('mbstring') && phpQuery::$mbstringSupport
+ ? mb_ereg_match('^[\w|\||-]+$', $s) || $s == '*'
+ : preg_match('@^[\w|\||-]+$@', $s) || $s == '*';
+ if ($isTag) {
+ if ($this->isXML()) {
+ // namespace support
+ if (mb_strpos($s, '|') !== false) {
+ $ns = $tag = null;
+ list($ns, $tag) = explode('|', $s);
+ $XQuery .= "$ns:$tag";
+ } else if ($s == '*') {
+ $XQuery .= "*";
+ } else {
+ $XQuery .= "*[local-name()='$s']";
+ }
+ } else {
+ $XQuery .= $s;
+ }
+ // ID
+ } else if ($s[0] == '#') {
+ if ($delimiterBefore)
+ $XQuery .= '*';
+ $XQuery .= "[@id='".substr($s, 1)."']";
+ // ATTRIBUTES
+ } else if ($s[0] == '[') {
+ if ($delimiterBefore)
+ $XQuery .= '*';
+ // strip side brackets
+ $attr = trim($s, '][');
+ $execute = false;
+ // attr with specifed value
+ if (mb_strpos($s, '=')) {
+ $value = null;
+ list($attr, $value) = explode('=', $attr);
+ $value = trim($value, "'\"");
+ if ($this->isRegexp($attr)) {
+ // cut regexp character
+ $attr = substr($attr, 0, -1);
+ $execute = true;
+ $XQuery .= "[@{$attr}]";
+ } else {
+ $XQuery .= "[@{$attr}='{$value}']";
+ }
+ // attr without specified value
+ } else {
+ $XQuery .= "[@{$attr}]";
+ }
+ if ($execute) {
+ $this->runQuery($XQuery, $s, 'is');
+ $XQuery = '';
+ if (! $this->length())
+ break;
+ }
+ // CLASSES
+ } else if ($s[0] == '.') {
+ // TODO use return $this->find("./self::*[contains(concat(\" \",@class,\" \"), \" $class \")]");
+ // thx wizDom ;)
+ if ($delimiterBefore)
+ $XQuery .= '*';
+ $XQuery .= '[@class]';
+ $this->runQuery($XQuery, $s, 'matchClasses');
+ $XQuery = '';
+ if (! $this->length() )
+ break;
+ // ~ General Sibling Selector
+ } else if ($s[0] == '~') {
+ $this->runQuery($XQuery);
+ $XQuery = '';
+ $this->elements = $this
+ ->siblings(
+ substr($s, 1)
+ )->elements;
+ if (! $this->length() )
+ break;
+ // + Adjacent sibling selectors
+ } else if ($s[0] == '+') {
+ // TODO /following-sibling::
+ $this->runQuery($XQuery);
+ $XQuery = '';
+ $subSelector = substr($s, 1);
+ $subElements = $this->elements;
+ $this->elements = array();
+ foreach($subElements as $node) {
+ // search first DOMElement sibling
+ $test = $node->nextSibling;
+ while($test && ! ($test instanceof DOMELEMENT))
+ $test = $test->nextSibling;
+ if ($test && $this->is($subSelector, $test))
+ $this->elements[] = $test;
+ }
+ if (! $this->length() )
+ break;
+ // PSEUDO CLASSES
+ } else if ($s[0] == ':') {
+ // TODO optimization for :first :last
+ if ($XQuery) {
+ $this->runQuery($XQuery);
+ $XQuery = '';
+ }
+ if (! $this->length())
+ break;
+ $this->pseudoClasses($s);
+ if (! $this->length())
+ break;
+ // DIRECT DESCENDANDS
+ } else if ($s == '>') {
+ $XQuery .= '/';
+ $delimiterBefore = 2;
+ // ALL DESCENDANDS
+ } else if ($s == ' ') {
+ $XQuery .= '//';
+ $delimiterBefore = 2;
+ // ERRORS
+ } else {
+ phpQuery::debug("Unrecognized token '$s'");
+ }
+ $delimiterBefore = $delimiterBefore === 2;
+ }
+ // run query if any
+ if ($XQuery && $XQuery != '//') {
+ $this->runQuery($XQuery);
+ $XQuery = '';
+ }
+ foreach($this->elements as $node)
+ if (! $this->elementsContainsNode($node, $stack))
+ $stack[] = $node;
+ }
+ $this->elements = $stack;
+ return $this->newInstance();
+ }
+ /**
+ * @todo create API for classes with pseudoselectors
+ * @access private
+ */
+ protected function pseudoClasses($class) {
+ // TODO clean args parsing ?
+ $class = ltrim($class, ':');
+ $haveArgs = mb_strpos($class, '(');
+ if ($haveArgs !== false) {
+ $args = substr($class, $haveArgs+1, -1);
+ $class = substr($class, 0, $haveArgs);
+ }
+ switch($class) {
+ case 'even':
+ case 'odd':
+ $stack = array();
+ foreach($this->elements as $i => $node) {
+ if ($class == 'even' && ($i%2) == 0)
+ $stack[] = $node;
+ else if ( $class == 'odd' && $i % 2 )
+ $stack[] = $node;
+ }
+ $this->elements = $stack;
+ break;
+ case 'eq':
+ $k = intval($args);
+ $this->elements = isset( $this->elements[$k] )
+ ? array( $this->elements[$k] )
+ : array();
+ break;
+ case 'gt':
+ $this->elements = array_slice($this->elements, $args+1);
+ break;
+ case 'lt':
+ $this->elements = array_slice($this->elements, 0, $args+1);
+ break;
+ case 'first':
+ if (isset($this->elements[0]))
+ $this->elements = array($this->elements[0]);
+ break;
+ case 'last':
+ if ($this->elements)
+ $this->elements = array($this->elements[count($this->elements)-1]);
+ break;
+ /*case 'parent':
+ $stack = array();
+ foreach($this->elements as $node) {
+ if ( $node->childNodes->length )
+ $stack[] = $node;
+ }
+ $this->elements = $stack;
+ break;*/
+ case 'contains':
+ $text = trim($args, "\"'");
+ $stack = array();
+ foreach($this->elements as $node) {
+ if (mb_stripos($node->textContent, $text) === false)
+ continue;
+ $stack[] = $node;
+ }
+ $this->elements = $stack;
+ break;
+ case 'not':
+ $selector = self::unQuote($args);
+ $this->elements = $this->not($selector)->stack();
+ break;
+ case 'slice':
+ // TODO jQuery difference ?
+ $args = explode(',',
+ str_replace(', ', ',', trim($args, "\"'"))
+ );
+ $start = $args[0];
+ $end = isset($args[1])
+ ? $args[1]
+ : null;
+ if ($end > 0)
+ $end = $end-$start;
+ $this->elements = array_slice($this->elements, $start, $end);
+ break;
+ case 'has':
+ $selector = trim($args, "\"'");
+ $stack = array();
+ foreach($this->stack(1) as $el) {
+ if ($this->find($selector, $el, true)->length)
+ $stack[] = $el;
+ }
+ $this->elements = $stack;
+ break;
+ case 'submit':
+ case 'reset':
+ $this->elements = phpQuery::merge(
+ $this->map(array($this, 'is'),
+ "input[type=$class]", new CallbackParam()
+ ),
+ $this->map(array($this, 'is'),
+ "button[type=$class]", new CallbackParam()
+ )
+ );
+ break;
+// $stack = array();
+// foreach($this->elements as $node)
+// if ($node->is('input[type=submit]') || $node->is('button[type=submit]'))
+// $stack[] = $el;
+// $this->elements = $stack;
+ case 'input':
+ $this->elements = $this->map(
+ array($this, 'is'),
+ 'input', new CallbackParam()
+ )->elements;
+ break;
+ case 'password':
+ case 'checkbox':
+ case 'radio':
+ case 'hidden':
+ case 'image':
+ case 'file':
+ $this->elements = $this->map(
+ array($this, 'is'),
+ "input[type=$class]", new CallbackParam()
+ )->elements;
+ break;
+ case 'parent':
+ $this->elements = $this->map(
+ create_function('$node', '
+ return $node instanceof DOMELEMENT && $node->childNodes->length
+ ? $node : null;')
+ )->elements;
+ break;
+ case 'empty':
+ $this->elements = $this->map(
+ create_function('$node', '
+ return $node instanceof DOMELEMENT && $node->childNodes->length
+ ? null : $node;')
+ )->elements;
+ break;
+ case 'disabled':
+ case 'selected':
+ case 'checked':
+ $this->elements = $this->map(
+ array($this, 'is'),
+ "[$class]", new CallbackParam()
+ )->elements;
+ break;
+ case 'enabled':
+ $this->elements = $this->map(
+ create_function('$node', '
+ return pq($node)->not(":disabled") ? $node : null;')
+ )->elements;
+ break;
+ case 'header':
+ $this->elements = $this->map(
+ create_function('$node',
+ '$isHeader = isset($node->tagName) && in_array($node->tagName, array(
+ "h1", "h2", "h3", "h4", "h5", "h6", "h7"
+ ));
+ return $isHeader
+ ? $node
+ : null;')
+ )->elements;
+// $this->elements = $this->map(
+// create_function('$node', '$node = pq($node);
+// return $node->is("h1")
+// || $node->is("h2")
+// || $node->is("h3")
+// || $node->is("h4")
+// || $node->is("h5")
+// || $node->is("h6")
+// || $node->is("h7")
+// ? $node
+// : null;')
+// )->elements;
+ break;
+ case 'only-child':
+ $this->elements = $this->map(
+ create_function('$node',
+ 'return pq($node)->siblings()->size() == 0 ? $node : null;')
+ )->elements;
+ break;
+ case 'first-child':
+ $this->elements = $this->map(
+ create_function('$node', 'return pq($node)->prevAll()->size() == 0 ? $node : null;')
+ )->elements;
+ break;
+ case 'last-child':
+ $this->elements = $this->map(
+ create_function('$node', 'return pq($node)->nextAll()->size() == 0 ? $node : null;')
+ )->elements;
+ break;
+ case 'nth-child':
+ $param = trim($args, "\"'");
+ if (! $param)
+ break;
+ // nth-child(n+b) to nth-child(1n+b)
+ if ($param{0} == 'n')
+ $param = '1'.$param;
+ // :nth-child(index/even/odd/equation)
+ if ($param == 'even' || $param == 'odd')
+ $mapped = $this->map(
+ create_function('$node, $param',
+ '$index = pq($node)->prevAll()->size()+1;
+ if ($param == "even" && ($index%2) == 0)
+ return $node;
+ else if ($param == "odd" && $index%2 == 1)
+ return $node;
+ else
+ return null;'),
+ new CallbackParam(), $param
+ );
+ else if (mb_strlen($param) > 1 && $param{1} == 'n')
+ // an+b
+ $mapped = $this->map(
+ create_function('$node, $param',
+ '$prevs = pq($node)->prevAll()->size();
+ $index = 1+$prevs;
+ $b = mb_strlen($param) > 3
+ ? $param{3}
+ : 0;
+ $a = $param{0};
+ if ($b && $param{2} == "-")
+ $b = -$b;
+ if ($a > 0) {
+ return ($index-$b)%$a == 0
+ ? $node
+ : null;
+ phpQuery::debug($a."*".floor($index/$a)."+$b-1 == ".($a*floor($index/$a)+$b-1)." ?= $prevs");
+ return $a*floor($index/$a)+$b-1 == $prevs
+ ? $node
+ : null;
+ } else if ($a == 0)
+ return $index == $b
+ ? $node
+ : null;
+ else
+ // negative value
+ return $index <= $b
+ ? $node
+ : null;
+// if (! $b)
+// return $index%$a == 0
+// ? $node
+// : null;
+// else
+// return ($index-$b)%$a == 0
+// ? $node
+// : null;
+ '),
+ new CallbackParam(), $param
+ );
+ else
+ // index
+ $mapped = $this->map(
+ create_function('$node, $index',
+ '$prevs = pq($node)->prevAll()->size();
+ if ($prevs && $prevs == $index-1)
+ return $node;
+ else if (! $prevs && $index == 1)
+ return $node;
+ else
+ return null;'),
+ new CallbackParam(), $param
+ );
+ $this->elements = $mapped->elements;
+ break;
+ default:
+ $this->debug("Unknown pseudoclass '{$class}', skipping...");
+ }
+ }
+ /**
+ * @access private
+ */
+ protected function __pseudoClassParam($paramsString) {
+ // TODO;
+ }
+ /**
+ * Enter description here...
+ *
+ * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery
+ */
+ public function is($selector, $nodes = null) {
+ phpQuery::debug(array("Is:", $selector));
+ if (! $selector)
+ return false;
+ $oldStack = $this->elements;
+ $returnArray = false;
+ if ($nodes && is_array($nodes)) {
+ $this->elements = $nodes;
+ } else if ($nodes)
+ $this->elements = array($nodes);
+ $this->filter($selector, true);
+ $stack = $this->elements;
+ $this->elements = $oldStack;
+ if ($nodes)
+ return $stack ? $stack : null;
+ return (bool)count($stack);
+ }
+ /**
+ * Enter description here...
+ * jQuery difference.
+ *
+ * Callback:
+ * - $index int
+ * - $node DOMNode
+ *
+ * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery
+ * @link http://docs.jquery.com/Traversing/filter
+ */
+ public function filterCallback($callback, $_skipHistory = false) {
+ if (! $_skipHistory) {
+ $this->elementsBackup = $this->elements;
+ $this->debug("Filtering by callback");
+ }
+ $newStack = array();
+ foreach($this->elements as $index => $node) {
+ $result = phpQuery::callbackRun($callback, array($index, $node));
+ if (is_null($result) || (! is_null($result) && $result))
+ $newStack[] = $node;
+ }
+ $this->elements = $newStack;
+ return $_skipHistory
+ ? $this
+ : $this->newInstance();
+ }
+ /**
+ * Enter description here...
+ *
+ * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery
+ * @link http://docs.jquery.com/Traversing/filter
+ */
+ public function filter($selectors, $_skipHistory = false) {
+ if ($selectors instanceof Callback OR $selectors instanceof Closure)
+ return $this->filterCallback($selectors, $_skipHistory);
+ if (! $_skipHistory)
+ $this->elementsBackup = $this->elements;
+ $notSimpleSelector = array(' ', '>', '~', '+', '/');
+ if (! is_array($selectors))
+ $selectors = $this->parseSelector($selectors);
+ if (! $_skipHistory)
+ $this->debug(array("Filtering:", $selectors));
+ $finalStack = array();
+ foreach($selectors as $selector) {
+ $stack = array();
+ if (! $selector)
+ break;
+ // avoid first space or /
+ if (in_array($selector[0], $notSimpleSelector))
+ $selector = array_slice($selector, 1);
+ // PER NODE selector chunks
+ foreach($this->stack() as $node) {
+ $break = false;
+ foreach($selector as $s) {
+ if (!($node instanceof DOMELEMENT)) {
+ // all besides DOMElement
+ if ( $s[0] == '[') {
+ $attr = trim($s, '[]');
+ if ( mb_strpos($attr, '=')) {
+ list( $attr, $val ) = explode('=', $attr);
+ if ($attr == 'nodeType' && $node->nodeType != $val)
+ $break = true;
+ }
+ } else
+ $break = true;
+ } else {
+ // DOMElement only
+ // ID
+ if ( $s[0] == '#') {
+ if ( $node->getAttribute('id') != substr($s, 1) )
+ $break = true;
+ // CLASSES
+ } else if ( $s[0] == '.') {
+ if (! $this->matchClasses( $s, $node ) )
+ $break = true;
+ // ATTRS
+ } else if ( $s[0] == '[') {
+ // strip side brackets
+ $attr = trim($s, '[]');
+ if (mb_strpos($attr, '=')) {
+ list($attr, $val) = explode('=', $attr);
+ $val = self::unQuote($val);
+ if ($attr == 'nodeType') {
+ if ($val != $node->nodeType)
+ $break = true;
+ } else if ($this->isRegexp($attr)) {
+ $val = extension_loaded('mbstring') && phpQuery::$mbstringSupport
+ ? quotemeta(trim($val, '"\''))
+ : preg_quote(trim($val, '"\''), '@');
+ // switch last character
+ switch( substr($attr, -1)) {
+ // quotemeta used insted of preg_quote
+ // http://code.google.com/p/phpquery/issues/detail?id=76
+ case '^':
+ $pattern = '^'.$val;
+ break;
+ case '*':
+ $pattern = '.*'.$val.'.*';
+ break;
+ case '$':
+ $pattern = '.*'.$val.'$';
+ break;
+ }
+ // cut last character
+ $attr = substr($attr, 0, -1);
+ $isMatch = extension_loaded('mbstring') && phpQuery::$mbstringSupport
+ ? mb_ereg_match($pattern, $node->getAttribute($attr))
+ : preg_match("@{$pattern}@", $node->getAttribute($attr));
+ if (! $isMatch)
+ $break = true;
+ } else if ($node->getAttribute($attr) != $val)
+ $break = true;
+ } else if (! $node->hasAttribute($attr))
+ $break = true;
+ // PSEUDO CLASSES
+ } else if ( $s[0] == ':') {
+ // skip
+ // TAG
+ } else if (trim($s)) {
+ if ($s != '*') {
+ // TODO namespaces
+ if (isset($node->tagName)) {
+ if ($node->tagName != $s)
+ $break = true;
+ } else if ($s == 'html' && ! $this->isRoot($node))
+ $break = true;
+ }
+ // AVOID NON-SIMPLE SELECTORS
+ } else if (in_array($s, $notSimpleSelector)) {
+ $break = true;
+ $this->debug(array('Skipping non simple selector', $selector));
+ }
+ }
+ if ($break)
+ break;
+ }
+ // if element passed all chunks of selector - add it to new stack
+ if (! $break )
+ $stack[] = $node;
+ }
+ $tmpStack = $this->elements;
+ $this->elements = $stack;
+ // PER ALL NODES selector chunks
+ foreach($selector as $s)
+ // PSEUDO CLASSES
+ if ($s[0] == ':')
+ $this->pseudoClasses($s);
+ foreach($this->elements as $node)
+ // XXX it should be merged without duplicates
+ // but jQuery doesnt do that
+ $finalStack[] = $node;
+ $this->elements = $tmpStack;
+ }
+ $this->elements = $finalStack;
+ if ($_skipHistory) {
+ return $this;
+ } else {
+ $this->debug("Stack length after filter(): ".count($finalStack));
+ return $this->newInstance();
+ }
+ }
+ /**
+ *
+ * @param $value
+ * @return unknown_type
+ * @TODO implement in all methods using passed parameters
+ */
+ protected static function unQuote($value) {
+ return $value[0] == '\'' || $value[0] == '"'
+ ? substr($value, 1, -1)
+ : $value;
+ }
+ /**
+ * Enter description here...
+ *
+ * @link http://docs.jquery.com/Ajax/load
+ * @return phpQuery|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery
+ * @todo Support $selector
+ */
+ public function load($url, $data = null, $callback = null) {
+ if ($data && ! is_array($data)) {
+ $callback = $data;
+ $data = null;
+ }
+ if (mb_strpos($url, ' ') !== false) {
+ $matches = null;
+ if (extension_loaded('mbstring') && phpQuery::$mbstringSupport)
+ mb_ereg('^([^ ]+) (.*)$', $url, $matches);
+ else
+ preg_match('^([^ ]+) (.*)$', $url, $matches);
+ $url = $matches[1];
+ $selector = $matches[2];
+ // FIXME this sucks, pass as callback param
+ $this->_loadSelector = $selector;
+ }
+ $ajax = array(
+ 'url' => $url,
+ 'type' => $data ? 'POST' : 'GET',
+ 'data' => $data,
+ 'complete' => $callback,
+ 'success' => array($this, '__loadSuccess')
+ );
+ phpQuery::ajax($ajax);
+ return $this;
+ }
+ /**
+ * @access private
+ * @param $html
+ * @return unknown_type
+ */
+ public function __loadSuccess($html) {
+ if ($this->_loadSelector) {
+ $html = phpQuery::newDocument($html)->find($this->_loadSelector);
+ unset($this->_loadSelector);
+ }
+ foreach($this->stack(1) as $node) {
+ phpQuery::pq($node, $this->getDocumentID())
+ ->markup($html);
+ }
+ }
+ /**
+ * Enter description here...
+ *
+ * @return phpQuery|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery
+ * @todo
+ */
+ public function css() {
+ // TODO
+ return $this;
+ }
+ /**
+ * @todo
+ *
+ */
+ public function show(){
+ // TODO
+ return $this;
+ }
+ /**
+ * @todo
+ *
+ */
+ public function hide(){
+ // TODO
+ return $this;
+ }
+ /**
+ * Trigger a type of event on every matched element.
+ *
+ * @param unknown_type $type
+ * @param unknown_type $data
+ * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery
+ * @TODO support more than event in $type (space-separated)
+ */
+ public function trigger($type, $data = array()) {
+ foreach($this->elements as $node)
+ phpQueryEvents::trigger($this->getDocumentID(), $type, $data, $node);
+ return $this;
+ }
+ /**
+ * This particular method triggers all bound event handlers on an element (for a specific event type) WITHOUT executing the browsers default actions.
+ *
+ * @param unknown_type $type
+ * @param unknown_type $data
+ * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery
+ * @TODO
+ */
+ public function triggerHandler($type, $data = array()) {
+ // TODO;
+ }
+ /**
+ * Binds a handler to one or more events (like click) for each matched element.
+ * Can also bind custom events.
+ *
+ * @param unknown_type $type
+ * @param unknown_type $data Optional
+ * @param unknown_type $callback
+ * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery
+ * @TODO support '!' (exclusive) events
+ * @TODO support more than event in $type (space-separated)
+ */
+ public function bind($type, $data, $callback = null) {
+ // TODO check if $data is callable, not using is_callable
+ if (! isset($callback)) {
+ $callback = $data;
+ $data = null;
+ }
+ foreach($this->elements as $node)
+ phpQueryEvents::add($this->getDocumentID(), $node, $type, $data, $callback);
+ return $this;
+ }
+ /**
+ * Enter description here...
+ *
+ * @param unknown_type $type
+ * @param unknown_type $callback
+ * @return unknown
+ * @TODO namespace events
+ * @TODO support more than event in $type (space-separated)
+ */
+ public function unbind($type = null, $callback = null) {
+ foreach($this->elements as $node)
+ phpQueryEvents::remove($this->getDocumentID(), $node, $type, $callback);
+ return $this;
+ }
+ /**
+ * Enter description here...
+ *
+ * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery
+ */
+ public function change($callback = null) {
+ if ($callback)
+ return $this->bind('change', $callback);
+ return $this->trigger('change');
+ }
+ /**
+ * Enter description here...
+ *
+ * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery
+ */
+ public function submit($callback = null) {
+ if ($callback)
+ return $this->bind('submit', $callback);
+ return $this->trigger('submit');
+ }
+ /**
+ * Enter description here...
+ *
+ * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery
+ */
+ public function click($callback = null) {
+ if ($callback)
+ return $this->bind('click', $callback);
+ return $this->trigger('click');
+ }
+ /**
+ * Enter description here...
+ *
+ * @param String|phpQuery
+ * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery
+ */
+ public function wrapAllOld($wrapper) {
+ $wrapper = pq($wrapper)->_clone();
+ if (! $wrapper->length() || ! $this->length() )
+ return $this;
+ $wrapper->insertBefore($this->elements[0]);
+ $deepest = $wrapper->elements[0];
+ while($deepest->firstChild && $deepest->firstChild instanceof DOMELEMENT)
+ $deepest = $deepest->firstChild;
+ pq($deepest)->append($this);
+ return $this;
+ }
+ /**
+ * Enter description here...
+ *
+ * TODO testme...
+ * @param String|phpQuery
+ * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery
+ */
+ public function wrapAll($wrapper) {
+ if (! $this->length())
+ return $this;
+ return phpQuery::pq($wrapper, $this->getDocumentID())
+ ->clone()
+ ->insertBefore($this->get(0))
+ ->map(array($this, '___wrapAllCallback'))
+ ->append($this);
+ }
+ /**
+ *
+ * @param $node
+ * @return unknown_type
+ * @access private
+ */
+ public function ___wrapAllCallback($node) {
+ $deepest = $node;
+ while($deepest->firstChild && $deepest->firstChild instanceof DOMELEMENT)
+ $deepest = $deepest->firstChild;
+ return $deepest;
+ }
+ /**
+ * Enter description here...
+ * NON JQUERY METHOD
+ *
+ * @param String|phpQuery
+ * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery
+ */
+ public function wrapAllPHP($codeBefore, $codeAfter) {
+ return $this
+ ->slice(0, 1)
+ ->beforePHP($codeBefore)
+ ->end()
+ ->slice(-1)
+ ->afterPHP($codeAfter)
+ ->end();
+ }
+ /**
+ * Enter description here...
+ *
+ * @param String|phpQuery
+ * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery
+ */
+ public function wrap($wrapper) {
+ foreach($this->stack() as $node)
+ phpQuery::pq($node, $this->getDocumentID())->wrapAll($wrapper);
+ return $this;
+ }
+ /**
+ * Enter description here...
+ *
+ * @param String|phpQuery
+ * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery
+ */
+ public function wrapPHP($codeBefore, $codeAfter) {
+ foreach($this->stack() as $node)
+ phpQuery::pq($node, $this->getDocumentID())->wrapAllPHP($codeBefore, $codeAfter);
+ return $this;
+ }
+ /**
+ * Enter description here...
+ *
+ * @param String|phpQuery
+ * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery
+ */
+ public function wrapInner($wrapper) {
+ foreach($this->stack() as $node)
+ phpQuery::pq($node, $this->getDocumentID())->contents()->wrapAll($wrapper);
+ return $this;
+ }
+ /**
+ * Enter description here...
+ *
+ * @param String|phpQuery
+ * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery
+ */
+ public function wrapInnerPHP($codeBefore, $codeAfter) {
+ foreach($this->stack(1) as $node)
+ phpQuery::pq($node, $this->getDocumentID())->contents()
+ ->wrapAllPHP($codeBefore, $codeAfter);
+ return $this;
+ }
+ /**
+ * Enter description here...
+ *
+ * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery
+ * @testme Support for text nodes
+ */
+ public function contents() {
+ $stack = array();
+ foreach($this->stack(1) as $el) {
+ // FIXME (fixed) http://code.google.com/p/phpquery/issues/detail?id=56
+// if (! isset($el->childNodes))
+// continue;
+ foreach($el->childNodes as $node) {
+ $stack[] = $node;
+ }
+ }
+ return $this->newInstance($stack);
+ }
+ /**
+ * Enter description here...
+ *
+ * jQuery difference.
+ *
+ * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery
+ */
+ public function contentsUnwrap() {
+ foreach($this->stack(1) as $node) {
+ if (! $node->parentNode )
+ continue;
+ $childNodes = array();
+ // any modification in DOM tree breaks childNodes iteration, so cache them first
+ foreach($node->childNodes as $chNode )
+ $childNodes[] = $chNode;
+ foreach($childNodes as $chNode )
+// $node->parentNode->appendChild($chNode);
+ $node->parentNode->insertBefore($chNode, $node);
+ $node->parentNode->removeChild($node);
+ }
+ return $this;
+ }
+ /**
+ * Enter description here...
+ *
+ * jQuery difference.
+ */
+ public function switchWith($markup) {
+ $markup = pq($markup, $this->getDocumentID());
+ $content = null;
+ foreach($this->stack(1) as $node) {
+ pq($node)
+ ->contents()->toReference($content)->end()
+ ->replaceWith($markup->clone()->append($content));
+ }
+ return $this;
+ }
+ /**
+ * Enter description here...
+ *
+ * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery
+ */
+ public function eq($num) {
+ $oldStack = $this->elements;
+ $this->elementsBackup = $this->elements;
+ $this->elements = array();
+ if ( isset($oldStack[$num]) )
+ $this->elements[] = $oldStack[$num];
+ return $this->newInstance();
+ }
+ /**
+ * Enter description here...
+ *
+ * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery
+ */
+ public function size() {
+ return count($this->elements);
+ }
+ /**
+ * Enter description here...
+ *
+ * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery
+ * @deprecated Use length as attribute
+ */
+ public function length() {
+ return $this->size();
+ }
+ public function count() {
+ return $this->size();
+ }
+ /**
+ * Enter description here...
+ *
+ * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery
+ * @todo $level
+ */
+ public function end($level = 1) {
+// $this->elements = array_pop( $this->history );
+// return $this;
+// $this->previous->DOM = $this->DOM;
+// $this->previous->XPath = $this->XPath;
+ return $this->previous
+ ? $this->previous
+ : $this;
+ }
+ /**
+ * Enter description here...
+ * Normal use ->clone() .
+ *
+ * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery
+ * @access private
+ */
+ public function _clone() {
+ $newStack = array();
+ //pr(array('copy... ', $this->whois()));
+ //$this->dumpHistory('copy');
+ $this->elementsBackup = $this->elements;
+ foreach($this->elements as $node) {
+ $newStack[] = $node->cloneNode(true);
+ }
+ $this->elements = $newStack;
+ return $this->newInstance();
+ }
+ /**
+ * Enter description here...
+ *
+ * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery
+ */
+ public function replaceWithPHP($code) {
+ return $this->replaceWith(phpQuery::php($code));
+ }
+ /**
+ * Enter description here...
+ *
+ * @param String|phpQuery $content
+ * @link http://docs.jquery.com/Manipulation/replaceWith#content
+ * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery
+ */
+ public function replaceWith($content) {
+ return $this->after($content)->remove();
+ }
+ /**
+ * Enter description here...
+ *
+ * @param String $selector
+ * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery
+ * @todo this works ?
+ */
+ public function replaceAll($selector) {
+ foreach(phpQuery::pq($selector, $this->getDocumentID()) as $node)
+ phpQuery::pq($node, $this->getDocumentID())
+ ->after($this->_clone())
+ ->remove();
+ return $this;
+ }
+ /**
+ * Enter description here...
+ *
+ * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery
+ */
+ public function remove($selector = null) {
+ $loop = $selector
+ ? $this->filter($selector)->elements
+ : $this->elements;
+ foreach($loop as $node) {
+ if (! $node->parentNode )
+ continue;
+ if (isset($node->tagName))
+ $this->debug("Removing '{$node->tagName}'");
+ $node->parentNode->removeChild($node);
+ // Mutation event
+ $event = new DOMEvent(array(
+ 'target' => $node,
+ 'type' => 'DOMNodeRemoved'
+ ));
+ phpQueryEvents::trigger($this->getDocumentID(),
+ $event->type, array($event), $node
+ );
+ }
+ return $this;
+ }
+ protected function markupEvents($newMarkup, $oldMarkup, $node) {
+ if ($node->tagName == 'textarea' && $newMarkup != $oldMarkup) {
+ $event = new DOMEvent(array(
+ 'target' => $node,
+ 'type' => 'change'
+ ));
+ phpQueryEvents::trigger($this->getDocumentID(),
+ $event->type, array($event), $node
+ );
+ }
+ }
+ /**
+ * jQuey difference
+ *
+ * @param $markup
+ * @return unknown_type
+ * @TODO trigger change event for textarea
+ */
+ public function markup($markup = null, $callback1 = null, $callback2 = null, $callback3 = null) {
+ $args = func_get_args();
+ if ($this->documentWrapper->isXML)
+ return call_user_func_array(array($this, 'xml'), $args);
+ else
+ return call_user_func_array(array($this, 'html'), $args);
+ }
+ /**
+ * jQuey difference
+ *
+ * @param $markup
+ * @return unknown_type
+ */
+ public function markupOuter($callback1 = null, $callback2 = null, $callback3 = null) {
+ $args = func_get_args();
+ if ($this->documentWrapper->isXML)
+ return call_user_func_array(array($this, 'xmlOuter'), $args);
+ else
+ return call_user_func_array(array($this, 'htmlOuter'), $args);
+ }
+ /**
+ * Enter description here...
+ *
+ * @param unknown_type $html
+ * @return string|phpQuery|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery
+ * @TODO force html result
+ */
+ public function html($html = null, $callback1 = null, $callback2 = null, $callback3 = null) {
+ if (isset($html)) {
+ // INSERT
+ $nodes = $this->documentWrapper->import($html);
+ $this->empty();
+ foreach($this->stack(1) as $alreadyAdded => $node) {
+ // for now, limit events for textarea
+ if (($this->isXHTML() || $this->isHTML()) && $node->tagName == 'textarea')
+ $oldHtml = pq($node, $this->getDocumentID())->markup();
+ foreach($nodes as $newNode) {
+ $node->appendChild($alreadyAdded
+ ? $newNode->cloneNode(true)
+ : $newNode
+ );
+ }
+ // for now, limit events for textarea
+ if (($this->isXHTML() || $this->isHTML()) && $node->tagName == 'textarea')
+ $this->markupEvents($html, $oldHtml, $node);
+ }
+ return $this;
+ } else {
+ // FETCH
+ $return = $this->documentWrapper->markup($this->elements, true);
+ $args = func_get_args();
+ foreach(array_slice($args, 1) as $callback) {
+ $return = phpQuery::callbackRun($callback, array($return));
+ }
+ return $return;
+ }
+ }
+ /**
+ * @TODO force xml result
+ */
+ public function xml($xml = null, $callback1 = null, $callback2 = null, $callback3 = null) {
+ $args = func_get_args();
+ return call_user_func_array(array($this, 'html'), $args);
+ }
+ /**
+ * Enter description here...
+ * @TODO force html result
+ *
+ * @return String
+ */
+ public function htmlOuter($callback1 = null, $callback2 = null, $callback3 = null) {
+ $markup = $this->documentWrapper->markup($this->elements);
+ // pass thou callbacks
+ $args = func_get_args();
+ foreach($args as $callback) {
+ $markup = phpQuery::callbackRun($callback, array($markup));
+ }
+ return $markup;
+ }
+ /**
+ * @TODO force xml result
+ */
+ public function xmlOuter($callback1 = null, $callback2 = null, $callback3 = null) {
+ $args = func_get_args();
+ return call_user_func_array(array($this, 'htmlOuter'), $args);
+ }
+ public function __toString() {
+ return $this->markupOuter();
+ }
+ /**
+ * Just like html(), but returns markup with VALID (dangerous) PHP tags.
+ *
+ * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery
+ * @todo support returning markup with PHP tags when called without param
+ */
+ public function php($code = null) {
+ return $this->markupPHP($code);
+ }
+ /**
+ * Enter description here...
+ *
+ * @param $code
+ * @return unknown_type
+ */
+ public function markupPHP($code = null) {
+ return isset($code)
+ ? $this->markup(phpQuery::php($code))
+ : phpQuery::markupToPHP($this->markup());
+ }
+ /**
+ * Enter description here...
+ *
+ * @param $code
+ * @return unknown_type
+ */
+ public function markupOuterPHP() {
+ return phpQuery::markupToPHP($this->markupOuter());
+ }
+ /**
+ * Enter description here...
+ *
+ * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery
+ */
+ public function children($selector = null) {
+ $stack = array();
+ foreach($this->stack(1) as $node) {
+// foreach($node->getElementsByTagName('*') as $newNode) {
+ foreach($node->childNodes as $newNode) {
+ if ($newNode->nodeType != 1)
+ continue;
+ if ($selector && ! $this->is($selector, $newNode))
+ continue;
+ if ($this->elementsContainsNode($newNode, $stack))
+ continue;
+ $stack[] = $newNode;
+ }
+ }
+ $this->elementsBackup = $this->elements;
+ $this->elements = $stack;
+ return $this->newInstance();
+ }
+ /**
+ * Enter description here...
+ *
+ * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery
+ */
+ public function ancestors($selector = null) {
+ return $this->children( $selector );
+ }
+ /**
+ * Enter description here...
+ *
+ * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery
+ */
+ public function append( $content) {
+ return $this->insert($content, __FUNCTION__);
+ }
+ /**
+ * Enter description here...
+ *
+ * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery
+ */
+ public function appendPHP( $content) {
+ return $this->insert("", 'append');
+ }
+ /**
+ * Enter description here...
+ *
+ * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery
+ */
+ public function appendTo( $seletor) {
+ return $this->insert($seletor, __FUNCTION__);
+ }
+ /**
+ * Enter description here...
+ *
+ * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery
+ */
+ public function prepend( $content) {
+ return $this->insert($content, __FUNCTION__);
+ }
+ /**
+ * Enter description here...
+ *
+ * @todo accept many arguments, which are joined, arrays maybe also
+ * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery
+ */
+ public function prependPHP( $content) {
+ return $this->insert("", 'prepend');
+ }
+ /**
+ * Enter description here...
+ *
+ * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery
+ */
+ public function prependTo( $seletor) {
+ return $this->insert($seletor, __FUNCTION__);
+ }
+ /**
+ * Enter description here...
+ *
+ * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery
+ */
+ public function before($content) {
+ return $this->insert($content, __FUNCTION__);
+ }
+ /**
+ * Enter description here...
+ *
+ * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery
+ */
+ public function beforePHP( $content) {
+ return $this->insert("", 'before');
+ }
+ /**
+ * Enter description here...
+ *
+ * @param String|phpQuery
+ * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery
+ */
+ public function insertBefore( $seletor) {
+ return $this->insert($seletor, __FUNCTION__);
+ }
+ /**
+ * Enter description here...
+ *
+ * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery
+ */
+ public function after( $content) {
+ return $this->insert($content, __FUNCTION__);
+ }
+ /**
+ * Enter description here...
+ *
+ * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery
+ */
+ public function afterPHP( $content) {
+ return $this->insert("", 'after');
+ }
+ /**
+ * Enter description here...
+ *
+ * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery
+ */
+ public function insertAfter( $seletor) {
+ return $this->insert($seletor, __FUNCTION__);
+ }
+ /**
+ * Internal insert method. Don't use it.
+ *
+ * @param unknown_type $target
+ * @param unknown_type $type
+ * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery
+ * @access private
+ */
+ public function insert($target, $type) {
+ $this->debug("Inserting data with '{$type}'");
+ $to = false;
+ switch( $type) {
+ case 'appendTo':
+ case 'prependTo':
+ case 'insertBefore':
+ case 'insertAfter':
+ $to = true;
+ }
+ switch(gettype($target)) {
+ case 'string':
+ $insertFrom = $insertTo = array();
+ if ($to) {
+ // INSERT TO
+ $insertFrom = $this->elements;
+ if (phpQuery::isMarkup($target)) {
+ // $target is new markup, import it
+ $insertTo = $this->documentWrapper->import($target);
+ // insert into selected element
+ } else {
+ // $tagret is a selector
+ $thisStack = $this->elements;
+ $this->toRoot();
+ $insertTo = $this->find($target)->elements;
+ $this->elements = $thisStack;
+ }
+ } else {
+ // INSERT FROM
+ $insertTo = $this->elements;
+ $insertFrom = $this->documentWrapper->import($target);
+ }
+ break;
+ case 'object':
+ $insertFrom = $insertTo = array();
+ // phpQuery
+ if ($target instanceof self) {
+ if ($to) {
+ $insertTo = $target->elements;
+ if ($this->documentFragment && $this->stackIsRoot())
+ // get all body children
+// $loop = $this->find('body > *')->elements;
+ // TODO test it, test it hard...
+// $loop = $this->newInstance($this->root)->find('> *')->elements;
+ $loop = $this->root->childNodes;
+ else
+ $loop = $this->elements;
+ // import nodes if needed
+ $insertFrom = $this->getDocumentID() == $target->getDocumentID()
+ ? $loop
+ : $target->documentWrapper->import($loop);
+ } else {
+ $insertTo = $this->elements;
+ if ( $target->documentFragment && $target->stackIsRoot() )
+ // get all body children
+// $loop = $target->find('body > *')->elements;
+ $loop = $target->root->childNodes;
+ else
+ $loop = $target->elements;
+ // import nodes if needed
+ $insertFrom = $this->getDocumentID() == $target->getDocumentID()
+ ? $loop
+ : $this->documentWrapper->import($loop);
+ }
+ // DOMNODE
+ } elseif ($target instanceof DOMNODE) {
+ // import node if needed
+// if ( $target->ownerDocument != $this->DOM )
+// $target = $this->DOM->importNode($target, true);
+ if ( $to) {
+ $insertTo = array($target);
+ if ($this->documentFragment && $this->stackIsRoot())
+ // get all body children
+ $loop = $this->root->childNodes;
+// $loop = $this->find('body > *')->elements;
+ else
+ $loop = $this->elements;
+ foreach($loop as $fromNode)
+ // import nodes if needed
+ $insertFrom[] = ! $fromNode->ownerDocument->isSameNode($target->ownerDocument)
+ ? $target->ownerDocument->importNode($fromNode, true)
+ : $fromNode;
+ } else {
+ // import node if needed
+ if (! $target->ownerDocument->isSameNode($this->document))
+ $target = $this->document->importNode($target, true);
+ $insertTo = $this->elements;
+ $insertFrom[] = $target;
+ }
+ }
+ break;
+ }
+ phpQuery::debug("From ".count($insertFrom)."; To ".count($insertTo)." nodes");
+ foreach($insertTo as $insertNumber => $toNode) {
+ // we need static relative elements in some cases
+ switch( $type) {
+ case 'prependTo':
+ case 'prepend':
+ $firstChild = $toNode->firstChild;
+ break;
+ case 'insertAfter':
+ case 'after':
+ $nextSibling = $toNode->nextSibling;
+ break;
+ }
+ foreach($insertFrom as $fromNode) {
+ // clone if inserted already before
+ $insert = $insertNumber
+ ? $fromNode->cloneNode(true)
+ : $fromNode;
+ switch($type) {
+ case 'appendTo':
+ case 'append':
+// $toNode->insertBefore(
+// $fromNode,
+// $toNode->lastChild->nextSibling
+// );
+ $toNode->appendChild($insert);
+ $eventTarget = $insert;
+ break;
+ case 'prependTo':
+ case 'prepend':
+ $toNode->insertBefore(
+ $insert,
+ $firstChild
+ );
+ break;
+ case 'insertBefore':
+ case 'before':
+ if (! $toNode->parentNode)
+ throw new Exception("No parentNode, can't do {$type}()");
+ else
+ $toNode->parentNode->insertBefore(
+ $insert,
+ $toNode
+ );
+ break;
+ case 'insertAfter':
+ case 'after':
+ if (! $toNode->parentNode)
+ throw new Exception("No parentNode, can't do {$type}()");
+ else
+ $toNode->parentNode->insertBefore(
+ $insert,
+ $nextSibling
+ );
+ break;
+ }
+ // Mutation event
+ $event = new DOMEvent(array(
+ 'target' => $insert,
+ 'type' => 'DOMNodeInserted'
+ ));
+ phpQueryEvents::trigger($this->getDocumentID(),
+ $event->type, array($event), $insert
+ );
+ }
+ }
+ return $this;
+ }
+ /**
+ * Enter description here...
+ *
+ * @return Int
+ */
+ public function index($subject) {
+ $index = -1;
+ $subject = $subject instanceof phpQueryObject
+ ? $subject->elements[0]
+ : $subject;
+ foreach($this->newInstance() as $k => $node) {
+ if ($node->isSameNode($subject))
+ $index = $k;
+ }
+ return $index;
+ }
+ /**
+ * Enter description here...
+ *
+ * @param unknown_type $start
+ * @param unknown_type $end
+ *
+ * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery
+ * @testme
+ */
+ public function slice($start, $end = null) {
+// $last = count($this->elements)-1;
+// $end = $end
+// ? min($end, $last)
+// : $last;
+// if ($start < 0)
+// $start = $last+$start;
+// if ($start > $last)
+// return array();
+ if ($end > 0)
+ $end = $end-$start;
+ return $this->newInstance(
+ array_slice($this->elements, $start, $end)
+ );
+ }
+ /**
+ * Enter description here...
+ *
+ * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery
+ */
+ public function reverse() {
+ $this->elementsBackup = $this->elements;
+ $this->elements = array_reverse($this->elements);
+ return $this->newInstance();
+ }
+ /**
+ * Return joined text content.
+ * @return String
+ */
+ public function text($text = null, $callback1 = null, $callback2 = null, $callback3 = null) {
+ if (isset($text))
+ return $this->html(htmlspecialchars($text));
+ $args = func_get_args();
+ $args = array_slice($args, 1);
+ $return = '';
+ foreach($this->elements as $node) {
+ $text = $node->textContent;
+ if (count($this->elements) > 1 && $text)
+ $text .= "\n";
+ foreach($args as $callback) {
+ $text = phpQuery::callbackRun($callback, array($text));
+ }
+ $return .= $text;
+ }
+ return $return;
+ }
+ /**
+ * Enter description here...
+ *
+ * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery
+ */
+ public function plugin($class, $file = null) {
+ phpQuery::plugin($class, $file);
+ return $this;
+ }
+ /**
+ * Deprecated, use $pq->plugin() instead.
+ *
+ * @deprecated
+ * @param $class
+ * @param $file
+ * @return unknown_type
+ */
+ public static function extend($class, $file = null) {
+ return $this->plugin($class, $file);
+ }
+ /**
+ *
+ * @access private
+ * @param $method
+ * @param $args
+ * @return unknown_type
+ */
+ public function __call($method, $args) {
+ $aliasMethods = array('clone', 'empty');
+ if (isset(phpQuery::$extendMethods[$method])) {
+ array_unshift($args, $this);
+ return phpQuery::callbackRun(
+ phpQuery::$extendMethods[$method], $args
+ );
+ } else if (isset(phpQuery::$pluginsMethods[$method])) {
+ array_unshift($args, $this);
+ $class = phpQuery::$pluginsMethods[$method];
+ $realClass = "phpQueryObjectPlugin_$class";
+ $return = call_user_func_array(
+ array($realClass, $method),
+ $args
+ );
+ // XXX deprecate ?
+ return is_null($return)
+ ? $this
+ : $return;
+ } else if (in_array($method, $aliasMethods)) {
+ return call_user_func_array(array($this, '_'.$method), $args);
+ } else
+ throw new Exception("Method '{$method}' doesnt exist");
+ }
+ /**
+ * Safe rename of next().
+ *
+ * Use it ONLY when need to call next() on an iterated object (in same time).
+ * Normaly there is no need to do such thing ;)
+ *
+ * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery
+ * @access private
+ */
+ public function _next($selector = null) {
+ return $this->newInstance(
+ $this->getElementSiblings('nextSibling', $selector, true)
+ );
+ }
+ /**
+ * Use prev() and next().
+ *
+ * @deprecated
+ * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery
+ * @access private
+ */
+ public function _prev($selector = null) {
+ return $this->prev($selector);
+ }
+ /**
+ * Enter description here...
+ *
+ * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery
+ */
+ public function prev($selector = null) {
+ return $this->newInstance(
+ $this->getElementSiblings('previousSibling', $selector, true)
+ );
+ }
+ /**
+ * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery
+ * @todo
+ */
+ public function prevAll($selector = null) {
+ return $this->newInstance(
+ $this->getElementSiblings('previousSibling', $selector)
+ );
+ }
+ /**
+ * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery
+ * @todo FIXME: returns source elements insted of next siblings
+ */
+ public function nextAll($selector = null) {
+ return $this->newInstance(
+ $this->getElementSiblings('nextSibling', $selector)
+ );
+ }
+ /**
+ * @access private
+ */
+ protected function getElementSiblings($direction, $selector = null, $limitToOne = false) {
+ $stack = array();
+ $count = 0;
+ foreach($this->stack() as $node) {
+ $test = $node;
+ while( isset($test->{$direction}) && $test->{$direction}) {
+ $test = $test->{$direction};
+ if (! $test instanceof DOMELEMENT)
+ continue;
+ $stack[] = $test;
+ if ($limitToOne)
+ break;
+ }
+ }
+ if ($selector) {
+ $stackOld = $this->elements;
+ $this->elements = $stack;
+ $stack = $this->filter($selector, true)->stack();
+ $this->elements = $stackOld;
+ }
+ return $stack;
+ }
+ /**
+ * Enter description here...
+ *
+ * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery
+ */
+ public function siblings($selector = null) {
+ $stack = array();
+ $siblings = array_merge(
+ $this->getElementSiblings('previousSibling', $selector),
+ $this->getElementSiblings('nextSibling', $selector)
+ );
+ foreach($siblings as $node) {
+ if (! $this->elementsContainsNode($node, $stack))
+ $stack[] = $node;
+ }
+ return $this->newInstance($stack);
+ }
+ /**
+ * Enter description here...
+ *
+ * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery
+ */
+ public function not($selector = null) {
+ if (is_string($selector))
+ phpQuery::debug(array('not', $selector));
+ else
+ phpQuery::debug('not');
+ $stack = array();
+ if ($selector instanceof self || $selector instanceof DOMNODE) {
+ foreach($this->stack() as $node) {
+ if ($selector instanceof self) {
+ $matchFound = false;
+ foreach($selector->stack() as $notNode) {
+ if ($notNode->isSameNode($node))
+ $matchFound = true;
+ }
+ if (! $matchFound)
+ $stack[] = $node;
+ } else if ($selector instanceof DOMNODE) {
+ if (! $selector->isSameNode($node))
+ $stack[] = $node;
+ } else {
+ if (! $this->is($selector))
+ $stack[] = $node;
+ }
+ }
+ } else {
+ $orgStack = $this->stack();
+ $matched = $this->filter($selector, true)->stack();
+// $matched = array();
+// // simulate OR in filter() instead of AND 5y
+// foreach($this->parseSelector($selector) as $s) {
+// $matched = array_merge($matched,
+// $this->filter(array($s))->stack()
+// );
+// }
+ foreach($orgStack as $node)
+ if (! $this->elementsContainsNode($node, $matched))
+ $stack[] = $node;
+ }
+ return $this->newInstance($stack);
+ }
+ /**
+ * Enter description here...
+ *
+ * @param string|phpQueryObject
+ * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery
+ */
+ public function add($selector = null) {
+ if (! $selector)
+ return $this;
+ $stack = array();
+ $this->elementsBackup = $this->elements;
+ $found = phpQuery::pq($selector, $this->getDocumentID());
+ $this->merge($found->elements);
+ return $this->newInstance();
+ }
+ /**
+ * @access private
+ */
+ protected function merge() {
+ foreach(func_get_args() as $nodes)
+ foreach($nodes as $newNode )
+ if (! $this->elementsContainsNode($newNode) )
+ $this->elements[] = $newNode;
+ }
+ /**
+ * @access private
+ * TODO refactor to stackContainsNode
+ */
+ protected function elementsContainsNode($nodeToCheck, $elementsStack = null) {
+ $loop = ! is_null($elementsStack)
+ ? $elementsStack
+ : $this->elements;
+ foreach($loop as $node) {
+ if ( $node->isSameNode( $nodeToCheck ) )
+ return true;
+ }
+ return false;
+ }
+ /**
+ * Enter description here...
+ *
+ * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery
+ */
+ public function parent($selector = null) {
+ $stack = array();
+ foreach($this->elements as $node )
+ if ( $node->parentNode && ! $this->elementsContainsNode($node->parentNode, $stack) )
+ $stack[] = $node->parentNode;
+ $this->elementsBackup = $this->elements;
+ $this->elements = $stack;
+ if ( $selector )
+ $this->filter($selector, true);
+ return $this->newInstance();
+ }
+ /**
+ * Enter description here...
+ *
+ * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery
+ */
+ public function parents($selector = null) {
+ $stack = array();
+ if (! $this->elements )
+ $this->debug('parents() - stack empty');
+ foreach($this->elements as $node) {
+ $test = $node;
+ while( $test->parentNode) {
+ $test = $test->parentNode;
+ if ($this->isRoot($test))
+ break;
+ if (! $this->elementsContainsNode($test, $stack)) {
+ $stack[] = $test;
+ continue;
+ }
+ }
+ }
+ $this->elementsBackup = $this->elements;
+ $this->elements = $stack;
+ if ( $selector )
+ $this->filter($selector, true);
+ return $this->newInstance();
+ }
+ /**
+ * Internal stack iterator.
+ *
+ * @access private
+ */
+ public function stack($nodeTypes = null) {
+ if (!isset($nodeTypes))
+ return $this->elements;
+ if (!is_array($nodeTypes))
+ $nodeTypes = array($nodeTypes);
+ $return = array();
+ foreach($this->elements as $node) {
+ if (in_array($node->nodeType, $nodeTypes))
+ $return[] = $node;
+ }
+ return $return;
+ }
+ // TODO phpdoc; $oldAttr is result of hasAttribute, before any changes
+ protected function attrEvents($attr, $oldAttr, $oldValue, $node) {
+ // skip events for XML documents
+ if (! $this->isXHTML() && ! $this->isHTML())
+ return;
+ $event = null;
+ // identify
+ $isInputValue = $node->tagName == 'input'
+ && (
+ in_array($node->getAttribute('type'),
+ array('text', 'password', 'hidden'))
+ || !$node->getAttribute('type')
+ );
+ $isRadio = $node->tagName == 'input'
+ && $node->getAttribute('type') == 'radio';
+ $isCheckbox = $node->tagName == 'input'
+ && $node->getAttribute('type') == 'checkbox';
+ $isOption = $node->tagName == 'option';
+ if ($isInputValue && $attr == 'value' && $oldValue != $node->getAttribute($attr)) {
+ $event = new DOMEvent(array(
+ 'target' => $node,
+ 'type' => 'change'
+ ));
+ } else if (($isRadio || $isCheckbox) && $attr == 'checked' && (
+ // check
+ (! $oldAttr && $node->hasAttribute($attr))
+ // un-check
+ || (! $node->hasAttribute($attr) && $oldAttr)
+ )) {
+ $event = new DOMEvent(array(
+ 'target' => $node,
+ 'type' => 'change'
+ ));
+ } else if ($isOption && $node->parentNode && $attr == 'selected' && (
+ // select
+ (! $oldAttr && $node->hasAttribute($attr))
+ // un-select
+ || (! $node->hasAttribute($attr) && $oldAttr)
+ )) {
+ $event = new DOMEvent(array(
+ 'target' => $node->parentNode,
+ 'type' => 'change'
+ ));
+ }
+ if ($event) {
+ phpQueryEvents::trigger($this->getDocumentID(),
+ $event->type, array($event), $node
+ );
+ }
+ }
+ public function attr($attr = null, $value = null) {
+ foreach($this->stack(1) as $node) {
+ if (! is_null($value)) {
+ $loop = $attr == '*'
+ ? $this->getNodeAttrs($node)
+ : array($attr);
+ foreach($loop as $a) {
+ $oldValue = $node->getAttribute($a);
+ $oldAttr = $node->hasAttribute($a);
+ // TODO raises an error when charset other than UTF-8
+ // while document's charset is also not UTF-8
+ @$node->setAttribute($a, $value);
+ $this->attrEvents($a, $oldAttr, $oldValue, $node);
+ }
+ } else if ($attr == '*') {
+ // jQuery difference
+ $return = array();
+ foreach($node->attributes as $n => $v)
+ $return[$n] = $v->value;
+ return $return;
+ } else
+ return $node->hasAttribute($attr)
+ ? $node->getAttribute($attr)
+ : null;
+ }
+ return is_null($value)
+ ? '' : $this;
+ }
+ /**
+ * @access private
+ */
+ protected function getNodeAttrs($node) {
+ $return = array();
+ foreach($node->attributes as $n => $o)
+ $return[] = $n;
+ return $return;
+ }
+ /**
+ * Enter description here...
+ *
+ * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery
+ * @todo check CDATA ???
+ */
+ public function attrPHP($attr, $code) {
+ if (! is_null($code)) {
+ $value = '<'.'?php '.$code.' ?'.'>';
+ // TODO tempolary solution
+ // http://code.google.com/p/phpquery/issues/detail?id=17
+// if (function_exists('mb_detect_encoding') && mb_detect_encoding($value) == 'ASCII')
+// $value = mb_convert_encoding($value, 'UTF-8', 'HTML-ENTITIES');
+ }
+ foreach($this->stack(1) as $node) {
+ if (! is_null($code)) {
+// $attrNode = $this->DOM->createAttribute($attr);
+ $node->setAttribute($attr, $value);
+// $attrNode->value = $value;
+// $node->appendChild($attrNode);
+ } else if ( $attr == '*') {
+ // jQuery diff
+ $return = array();
+ foreach($node->attributes as $n => $v)
+ $return[$n] = $v->value;
+ return $return;
+ } else
+ return $node->getAttribute($attr);
+ }
+ return $this;
+ }
+ /**
+ * Enter description here...
+ *
+ * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery
+ */
+ public function removeAttr($attr) {
+ foreach($this->stack(1) as $node) {
+ $loop = $attr == '*'
+ ? $this->getNodeAttrs($node)
+ : array($attr);
+ foreach($loop as $a) {
+ $oldValue = $node->getAttribute($a);
+ $node->removeAttribute($a);
+ $this->attrEvents($a, $oldValue, null, $node);
+ }
+ }
+ return $this;
+ }
+ /**
+ * Return form element value.
+ *
+ * @return String Fields value.
+ */
+ public function val($val = null) {
+ if (! isset($val)) {
+ if ($this->eq(0)->is('select')) {
+ $selected = $this->eq(0)->find('option[selected=selected]');
+ if ($selected->is('[value]'))
+ return $selected->attr('value');
+ else
+ return $selected->text();
+ } else if ($this->eq(0)->is('textarea'))
+ return $this->eq(0)->markup();
+ else
+ return $this->eq(0)->attr('value');
+ } else {
+ $_val = null;
+ foreach($this->stack(1) as $node) {
+ $node = pq($node, $this->getDocumentID());
+ if (is_array($val) && in_array($node->attr('type'), array('checkbox', 'radio'))) {
+ $isChecked = in_array($node->attr('value'), $val)
+ || in_array($node->attr('name'), $val);
+ if ($isChecked)
+ $node->attr('checked', 'checked');
+ else
+ $node->removeAttr('checked');
+ } else if ($node->get(0)->tagName == 'select') {
+ if (! isset($_val)) {
+ $_val = array();
+ if (! is_array($val))
+ $_val = array((string)$val);
+ else
+ foreach($val as $v)
+ $_val[] = $v;
+ }
+ foreach($node['option']->stack(1) as $option) {
+ $option = pq($option, $this->getDocumentID());
+ $selected = false;
+ // XXX: workaround for string comparsion, see issue #96
+ // http://code.google.com/p/phpquery/issues/detail?id=96
+ $selected = is_null($option->attr('value'))
+ ? in_array($option->markup(), $_val)
+ : in_array($option->attr('value'), $_val);
+// $optionValue = $option->attr('value');
+// $optionText = $option->text();
+// $optionTextLenght = mb_strlen($optionText);
+// foreach($_val as $v)
+// if ($optionValue == $v)
+// $selected = true;
+// else if ($optionText == $v && $optionTextLenght == mb_strlen($v))
+// $selected = true;
+ if ($selected)
+ $option->attr('selected', 'selected');
+ else
+ $option->removeAttr('selected');
+ }
+ } else if ($node->get(0)->tagName == 'textarea')
+ $node->markup($val);
+ else
+ $node->attr('value', $val);
+ }
+ }
+ return $this;
+ }
+ /**
+ * Enter description here...
+ *
+ * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery
+ */
+ public function andSelf() {
+ if ( $this->previous )
+ $this->elements = array_merge($this->elements, $this->previous->elements);
+ return $this;
+ }
+ /**
+ * Enter description here...
+ *
+ * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery
+ */
+ public function addClass( $className) {
+ if (! $className)
+ return $this;
+ foreach($this->stack(1) as $node) {
+ if (! $this->is(".$className", $node))
+ $node->setAttribute(
+ 'class',
+ trim($node->getAttribute('class').' '.$className)
+ );
+ }
+ return $this;
+ }
+ /**
+ * Enter description here...
+ *
+ * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery
+ */
+ public function addClassPHP( $className) {
+ foreach($this->stack(1) as $node) {
+ $classes = $node->getAttribute('class');
+ $newValue = $classes
+ ? $classes.' <'.'?php '.$className.' ?'.'>'
+ : '<'.'?php '.$className.' ?'.'>';
+ $node->setAttribute('class', $newValue);
+ }
+ return $this;
+ }
+ /**
+ * Enter description here...
+ *
+ * @param string $className
+ * @return bool
+ */
+ public function hasClass($className) {
+ foreach($this->stack(1) as $node) {
+ if ( $this->is(".$className", $node))
+ return true;
+ }
+ return false;
+ }
+ /**
+ * Enter description here...
+ *
+ * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery
+ */
+ public function removeClass($className) {
+ foreach($this->stack(1) as $node) {
+ $classes = explode( ' ', $node->getAttribute('class'));
+ if ( in_array($className, $classes)) {
+ $classes = array_diff($classes, array($className));
+ if ( $classes )
+ $node->setAttribute('class', implode(' ', $classes));
+ else
+ $node->removeAttribute('class');
+ }
+ }
+ return $this;
+ }
+ /**
+ * Enter description here...
+ *
+ * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery
+ */
+ public function toggleClass($className) {
+ foreach($this->stack(1) as $node) {
+ if ( $this->is( $node, '.'.$className ))
+ $this->removeClass($className);
+ else
+ $this->addClass($className);
+ }
+ return $this;
+ }
+ /**
+ * Proper name without underscore (just ->empty()) also works.
+ *
+ * Removes all child nodes from the set of matched elements.
+ *
+ * Example:
+ * pq("p")._empty()
+ *
+ * HTML:
+ * Hello, Person and person
+ *
+ * Result:
+ * [ ]
+ *
+ * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery
+ * @access private
+ */
+ public function _empty() {
+ foreach($this->stack(1) as $node) {
+ // thx to 'dave at dgx dot cz'
+ $node->nodeValue = '';
+ }
+ return $this;
+ }
+ /**
+ * Enter description here...
+ *
+ * @param array|string $callback Expects $node as first param, $index as second
+ * @param array $scope External variables passed to callback. Use compact('varName1', 'varName2'...) and extract($scope)
+ * @param array $arg1 Will ba passed as third and futher args to callback.
+ * @param array $arg2 Will ba passed as fourth and futher args to callback, and so on...
+ * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery
+ */
+ public function each($callback, $param1 = null, $param2 = null, $param3 = null) {
+ $paramStructure = null;
+ if (func_num_args() > 1) {
+ $paramStructure = func_get_args();
+ $paramStructure = array_slice($paramStructure, 1);
+ }
+ foreach($this->elements as $v)
+ phpQuery::callbackRun($callback, array($v), $paramStructure);
+ return $this;
+ }
+ /**
+ * Run callback on actual object.
+ *
+ * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery
+ */
+ public function callback($callback, $param1 = null, $param2 = null, $param3 = null) {
+ $params = func_get_args();
+ $params[0] = $this;
+ phpQuery::callbackRun($callback, $params);
+ return $this;
+ }
+ /**
+ * Enter description here...
+ *
+ * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery
+ * @todo add $scope and $args as in each() ???
+ */
+ public function map($callback, $param1 = null, $param2 = null, $param3 = null) {
+// $stack = array();
+//// foreach($this->newInstance() as $node) {
+// foreach($this->newInstance() as $node) {
+// $result = call_user_func($callback, $node);
+// if ($result)
+// $stack[] = $result;
+// }
+ $params = func_get_args();
+ array_unshift($params, $this->elements);
+ return $this->newInstance(
+ call_user_func_array(array('phpQuery', 'map'), $params)
+// phpQuery::map($this->elements, $callback)
+ );
+ }
+ /**
+ * Enter description here...
+ *
+ * @param $key
+ * @param $value
+ */
+ public function data($key, $value = null) {
+ if (! isset($value)) {
+ // TODO? implement specific jQuery behavior od returning parent values
+ // is child which we look up doesn't exist
+ return phpQuery::data($this->get(0), $key, $value, $this->getDocumentID());
+ } else {
+ foreach($this as $node)
+ phpQuery::data($node, $key, $value, $this->getDocumentID());
+ return $this;
+ }
+ }
+ /**
+ * Enter description here...
+ *
+ * @param $key
+ */
+ public function removeData($key) {
+ foreach($this as $node)
+ phpQuery::removeData($node, $key, $this->getDocumentID());
+ return $this;
+ }
+ // INTERFACE IMPLEMENTATIONS
+
+ // ITERATOR INTERFACE
+ /**
+ * @access private
+ */
+ public function rewind(){
+ $this->debug('iterating foreach');
+// phpQuery::selectDocument($this->getDocumentID());
+ $this->elementsBackup = $this->elements;
+ $this->elementsInterator = $this->elements;
+ $this->valid = isset( $this->elements[0] )
+ ? 1 : 0;
+// $this->elements = $this->valid
+// ? array($this->elements[0])
+// : array();
+ $this->current = 0;
+ }
+ /**
+ * @access private
+ */
+ public function current(){
+ return $this->elementsInterator[ $this->current ];
+ }
+ /**
+ * @access private
+ */
+ public function key(){
+ return $this->current;
+ }
+ /**
+ * Double-function method.
+ *
+ * First: main iterator interface method.
+ * Second: Returning next sibling, alias for _next().
+ *
+ * Proper functionality is choosed automagicaly.
+ *
+ * @see phpQueryObject::_next()
+ * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery
+ */
+ public function next($cssSelector = null){
+// if ($cssSelector || $this->valid)
+// return $this->_next($cssSelector);
+ $this->valid = isset( $this->elementsInterator[ $this->current+1 ] )
+ ? true
+ : false;
+ if (! $this->valid && $this->elementsInterator) {
+ $this->elementsInterator = null;
+ } else if ($this->valid) {
+ $this->current++;
+ } else {
+ return $this->_next($cssSelector);
+ }
+ }
+ /**
+ * @access private
+ */
+ public function valid(){
+ return $this->valid;
+ }
+ // ITERATOR INTERFACE END
+ // ARRAYACCESS INTERFACE
+ /**
+ * @access private
+ */
+ public function offsetExists($offset) {
+ return $this->find($offset)->size() > 0;
+ }
+ /**
+ * @access private
+ */
+ public function offsetGet($offset) {
+ return $this->find($offset);
+ }
+ /**
+ * @access private
+ */
+ public function offsetSet($offset, $value) {
+// $this->find($offset)->replaceWith($value);
+ $this->find($offset)->html($value);
+ }
+ /**
+ * @access private
+ */
+ public function offsetUnset($offset) {
+ // empty
+ throw new Exception("Can't do unset, use array interface only for calling queries and replacing HTML.");
+ }
+ // ARRAYACCESS INTERFACE END
+ /**
+ * Returns node's XPath.
+ *
+ * @param unknown_type $oneNode
+ * @return string
+ * @TODO use native getNodePath is avaible
+ * @access private
+ */
+ protected function getNodeXpath($oneNode = null, $namespace = null) {
+ $return = array();
+ $loop = $oneNode
+ ? array($oneNode)
+ : $this->elements;
+// if ($namespace)
+// $namespace .= ':';
+ foreach($loop as $node) {
+ if ($node instanceof DOMDOCUMENT) {
+ $return[] = '';
+ continue;
+ }
+ $xpath = array();
+ while(! ($node instanceof DOMDOCUMENT)) {
+ $i = 1;
+ $sibling = $node;
+ while($sibling->previousSibling) {
+ $sibling = $sibling->previousSibling;
+ $isElement = $sibling instanceof DOMELEMENT;
+ if ($isElement && $sibling->tagName == $node->tagName)
+ $i++;
+ }
+ $xpath[] = $this->isXML()
+ ? "*[local-name()='{$node->tagName}'][{$i}]"
+ : "{$node->tagName}[{$i}]";
+ $node = $node->parentNode;
+ }
+ $xpath = join('/', array_reverse($xpath));
+ $return[] = '/'.$xpath;
+ }
+ return $oneNode
+ ? $return[0]
+ : $return;
+ }
+ // HELPERS
+ public function whois($oneNode = null) {
+ $return = array();
+ $loop = $oneNode
+ ? array( $oneNode )
+ : $this->elements;
+ foreach($loop as $node) {
+ if (isset($node->tagName)) {
+ $tag = in_array($node->tagName, array('php', 'js'))
+ ? strtoupper($node->tagName)
+ : $node->tagName;
+ $return[] = $tag
+ .($node->getAttribute('id')
+ ? '#'.$node->getAttribute('id'):'')
+ .($node->getAttribute('class')
+ ? '.'.join('.', split(' ', $node->getAttribute('class'))):'')
+ .($node->getAttribute('name')
+ ? '[name="'.$node->getAttribute('name').'"]':'')
+ .($node->getAttribute('value') && strpos($node->getAttribute('value'), '<'.'?php') === false
+ ? '[value="'.substr(str_replace("\n", '', $node->getAttribute('value')), 0, 15).'"]':'')
+ .($node->getAttribute('value') && strpos($node->getAttribute('value'), '<'.'?php') !== false
+ ? '[value=PHP]':'')
+ .($node->getAttribute('selected')
+ ? '[selected]':'')
+ .($node->getAttribute('checked')
+ ? '[checked]':'')
+ ;
+ } else if ($node instanceof DOMTEXT) {
+ if (trim($node->textContent))
+ $return[] = 'Text:'.substr(str_replace("\n", ' ', $node->textContent), 0, 15);
+ } else {
+
+ }
+ }
+ return $oneNode && isset($return[0])
+ ? $return[0]
+ : $return;
+ }
+ /**
+ * Dump htmlOuter and preserve chain. Usefull for debugging.
+ *
+ * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery
+ *
+ */
+ public function dump() {
+ print 'DUMP #'.(phpQuery::$dumpCount++).' ';
+ $debug = phpQuery::$debug;
+ phpQuery::$debug = false;
+// print __FILE__.':'.__LINE__."\n";
+ var_dump($this->htmlOuter());
+ return $this;
+ }
+ public function dumpWhois() {
+ print 'DUMP #'.(phpQuery::$dumpCount++).' ';
+ $debug = phpQuery::$debug;
+ phpQuery::$debug = false;
+// print __FILE__.':'.__LINE__."\n";
+ var_dump('whois', $this->whois());
+ phpQuery::$debug = $debug;
+ return $this;
+ }
+ public function dumpLength() {
+ print 'DUMP #'.(phpQuery::$dumpCount++).' ';
+ $debug = phpQuery::$debug;
+ phpQuery::$debug = false;
+// print __FILE__.':'.__LINE__."\n";
+ var_dump('length', $this->length());
+ phpQuery::$debug = $debug;
+ return $this;
+ }
+ public function dumpTree($html = true, $title = true) {
+ $output = $title
+ ? 'DUMP #'.(phpQuery::$dumpCount++)." \n" : '';
+ $debug = phpQuery::$debug;
+ phpQuery::$debug = false;
+ foreach($this->stack() as $node)
+ $output .= $this->__dumpTree($node);
+ phpQuery::$debug = $debug;
+ print $html
+ ? nl2br(str_replace(' ', ' ', $output))
+ : $output;
+ return $this;
+ }
+ private function __dumpTree($node, $intend = 0) {
+ $whois = $this->whois($node);
+ $return = '';
+ if ($whois)
+ $return .= str_repeat(' - ', $intend).$whois."\n";
+ if (isset($node->childNodes))
+ foreach($node->childNodes as $chNode)
+ $return .= $this->__dumpTree($chNode, $intend+1);
+ return $return;
+ }
+ /**
+ * Dump htmlOuter and stop script execution. Usefull for debugging.
+ *
+ */
+ public function dumpDie() {
+ print __FILE__.':'.__LINE__;
+ var_dump($this->htmlOuter());
+ die();
+ }
+}
+
+
+// -- Multibyte Compatibility functions ---------------------------------------
+// http://svn.iphonewebdev.com/lace/lib/mb_compat.php
+
+/**
+ * mb_internal_encoding()
+ *
+ * Included for mbstring pseudo-compatability.
+ */
+if (!function_exists('mb_internal_encoding'))
+{
+ function mb_internal_encoding($enc) {return true; }
+}
+
+/**
+ * mb_regex_encoding()
+ *
+ * Included for mbstring pseudo-compatability.
+ */
+if (!function_exists('mb_regex_encoding'))
+{
+ function mb_regex_encoding($enc) {return true; }
+}
+
+/**
+ * mb_strlen()
+ *
+ * Included for mbstring pseudo-compatability.
+ */
+if (!function_exists('mb_strlen'))
+{
+ function mb_strlen($str)
+ {
+ return strlen($str);
+ }
+}
+
+/**
+ * mb_strpos()
+ *
+ * Included for mbstring pseudo-compatability.
+ */
+if (!function_exists('mb_strpos'))
+{
+ function mb_strpos($haystack, $needle, $offset=0)
+ {
+ return strpos($haystack, $needle, $offset);
+ }
+}
+/**
+ * mb_stripos()
+ *
+ * Included for mbstring pseudo-compatability.
+ */
+if (!function_exists('mb_stripos'))
+{
+ function mb_stripos($haystack, $needle, $offset=0)
+ {
+ return stripos($haystack, $needle, $offset);
+ }
+}
+
+/**
+ * mb_substr()
+ *
+ * Included for mbstring pseudo-compatability.
+ */
+if (!function_exists('mb_substr'))
+{
+ function mb_substr($str, $start, $length=0)
+ {
+ return substr($str, $start, $length);
+ }
+}
+
+/**
+ * mb_substr_count()
+ *
+ * Included for mbstring pseudo-compatability.
+ */
+if (!function_exists('mb_substr_count'))
+{
+ function mb_substr_count($haystack, $needle)
+ {
+ return substr_count($haystack, $needle);
+ }
+}
+
+
+/**
+ * Static namespace for phpQuery functions.
+ *
+ * @author Tobiasz Cudnik
+ * @package phpQuery
+ */
+abstract class phpQuery {
+ /**
+ * XXX: Workaround for mbstring problems
+ *
+ * @var bool
+ */
+ public static $mbstringSupport = true;
+ public static $debug = false;
+ public static $documents = array();
+ public static $defaultDocumentID = null;
+// public static $defaultDoctype = 'html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"';
+ /**
+ * Applies only to HTML.
+ *
+ * @var unknown_type
+ */
+ public static $defaultDoctype = '';
+ public static $defaultCharset = 'UTF-8';
+ /**
+ * Static namespace for plugins.
+ *
+ * @var object
+ */
+ public static $plugins = array();
+ /**
+ * List of loaded plugins.
+ *
+ * @var unknown_type
+ */
+ public static $pluginsLoaded = array();
+ public static $pluginsMethods = array();
+ public static $pluginsStaticMethods = array();
+ public static $extendMethods = array();
+ /**
+ * @TODO implement
+ */
+ public static $extendStaticMethods = array();
+ /**
+ * Hosts allowed for AJAX connections.
+ * Dot '.' means $_SERVER['HTTP_HOST'] (if any).
+ *
+ * @var array
+ */
+ public static $ajaxAllowedHosts = array(
+ '.'
+ );
+ /**
+ * AJAX settings.
+ *
+ * @var array
+ * XXX should it be static or not ?
+ */
+ public static $ajaxSettings = array(
+ 'url' => '',//TODO
+ 'global' => true,
+ 'type' => "GET",
+ 'timeout' => null,
+ 'contentType' => "application/x-www-form-urlencoded",
+ 'processData' => true,
+// 'async' => true,
+ 'data' => null,
+ 'username' => null,
+ 'password' => null,
+ 'accepts' => array(
+ 'xml' => "application/xml, text/xml",
+ 'html' => "text/html",
+ 'script' => "text/javascript, application/javascript",
+ 'json' => "application/json, text/javascript",
+ 'text' => "text/plain",
+ '_default' => "*/*"
+ )
+ );
+ public static $lastModified = null;
+ public static $active = 0;
+ public static $dumpCount = 0;
+ /**
+ * Multi-purpose function.
+ * Use pq() as shortcut.
+ *
+ * In below examples, $pq is any result of pq(); function.
+ *
+ * 1. Import markup into existing document (without any attaching):
+ * - Import into selected document:
+ * pq('') // DOESNT accept text nodes at beginning of input string !
+ * - Import into document with ID from $pq->getDocumentID():
+ * pq('', $pq->getDocumentID())
+ * - Import into same document as DOMNode belongs to:
+ * pq('', DOMNode)
+ * - Import into document from phpQuery object:
+ * pq('', $pq)
+ *
+ * 2. Run query:
+ * - Run query on last selected document:
+ * pq('div.myClass')
+ * - Run query on document with ID from $pq->getDocumentID():
+ * pq('div.myClass', $pq->getDocumentID())
+ * - Run query on same document as DOMNode belongs to and use node(s)as root for query:
+ * pq('div.myClass', DOMNode)
+ * - Run query on document from phpQuery object
+ * and use object's stack as root node(s) for query:
+ * pq('div.myClass', $pq)
+ *
+ * @param string|DOMNode|DOMNodeList|array $arg1 HTML markup, CSS Selector, DOMNode or array of DOMNodes
+ * @param string|phpQueryObject|DOMNode $context DOM ID from $pq->getDocumentID(), phpQuery object (determines also query root) or DOMNode (determines also query root)
+ *
+ * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery|QueryTemplatesPhpQuery|false
+ * phpQuery object or false in case of error.
+ */
+ public static function pq($arg1, $context = null) {
+ if ($arg1 instanceof DOMNODE && ! isset($context)) {
+ foreach(phpQuery::$documents as $documentWrapper) {
+ $compare = $arg1 instanceof DOMDocument
+ ? $arg1 : $arg1->ownerDocument;
+ if ($documentWrapper->document->isSameNode($compare))
+ $context = $documentWrapper->id;
+ }
+ }
+ if (! $context) {
+ $domId = self::$defaultDocumentID;
+ if (! $domId)
+ throw new Exception("Can't use last created DOM, because there isn't any. Use phpQuery::newDocument() first.");
+// } else if (is_object($context) && ($context instanceof PHPQUERY || is_subclass_of($context, 'phpQueryObject')))
+ } else if (is_object($context) && $context instanceof phpQueryObject)
+ $domId = $context->getDocumentID();
+ else if ($context instanceof DOMDOCUMENT) {
+ $domId = self::getDocumentID($context);
+ if (! $domId) {
+ //throw new Exception('Orphaned DOMDocument');
+ $domId = self::newDocument($context)->getDocumentID();
+ }
+ } else if ($context instanceof DOMNODE) {
+ $domId = self::getDocumentID($context);
+ if (! $domId) {
+ throw new Exception('Orphaned DOMNode');
+// $domId = self::newDocument($context->ownerDocument);
+ }
+ } else
+ $domId = $context;
+ if ($arg1 instanceof phpQueryObject) {
+// if (is_object($arg1) && (get_class($arg1) == 'phpQueryObject' || $arg1 instanceof PHPQUERY || is_subclass_of($arg1, 'phpQueryObject'))) {
+ /**
+ * Return $arg1 or import $arg1 stack if document differs:
+ * pq(pq(''))
+ */
+ if ($arg1->getDocumentID() == $domId)
+ return $arg1;
+ $class = get_class($arg1);
+ // support inheritance by passing old object to overloaded constructor
+ $phpQuery = $class != 'phpQuery'
+ ? new $class($arg1, $domId)
+ : new phpQueryObject($domId);
+ $phpQuery->elements = array();
+ foreach($arg1->elements as $node)
+ $phpQuery->elements[] = $phpQuery->document->importNode($node, true);
+ return $phpQuery;
+ } else if ($arg1 instanceof DOMNODE || (is_array($arg1) && isset($arg1[0]) && $arg1[0] instanceof DOMNODE)) {
+ /*
+ * Wrap DOM nodes with phpQuery object, import into document when needed:
+ * pq(array($domNode1, $domNode2))
+ */
+ $phpQuery = new phpQueryObject($domId);
+ if (!($arg1 instanceof DOMNODELIST) && ! is_array($arg1))
+ $arg1 = array($arg1);
+ $phpQuery->elements = array();
+ foreach($arg1 as $node) {
+ $sameDocument = $node->ownerDocument instanceof DOMDOCUMENT
+ && ! $node->ownerDocument->isSameNode($phpQuery->document);
+ $phpQuery->elements[] = $sameDocument
+ ? $phpQuery->document->importNode($node, true)
+ : $node;
+ }
+ return $phpQuery;
+ } else if (self::isMarkup($arg1)) {
+ /**
+ * Import HTML:
+ * pq('')
+ */
+ $phpQuery = new phpQueryObject($domId);
+ return $phpQuery->newInstance(
+ $phpQuery->documentWrapper->import($arg1)
+ );
+ } else {
+ /**
+ * Run CSS query:
+ * pq('div.myClass')
+ */
+ $phpQuery = new phpQueryObject($domId);
+// if ($context && ($context instanceof PHPQUERY || is_subclass_of($context, 'phpQueryObject')))
+ if ($context && $context instanceof phpQueryObject)
+ $phpQuery->elements = $context->elements;
+ else if ($context && $context instanceof DOMNODELIST) {
+ $phpQuery->elements = array();
+ foreach($context as $node)
+ $phpQuery->elements[] = $node;
+ } else if ($context && $context instanceof DOMNODE)
+ $phpQuery->elements = array($context);
+ return $phpQuery->find($arg1);
+ }
+ }
+ /**
+ * Sets default document to $id. Document has to be loaded prior
+ * to using this method.
+ * $id can be retrived via getDocumentID() or getDocumentIDRef().
+ *
+ * @param unknown_type $id
+ */
+ public static function selectDocument($id) {
+ $id = self::getDocumentID($id);
+ self::debug("Selecting document '$id' as default one");
+ self::$defaultDocumentID = self::getDocumentID($id);
+ }
+ /**
+ * Returns document with id $id or last used as phpQueryObject.
+ * $id can be retrived via getDocumentID() or getDocumentIDRef().
+ * Chainable.
+ *
+ * @see phpQuery::selectDocument()
+ * @param unknown_type $id
+ * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery
+ */
+ public static function getDocument($id = null) {
+ if ($id)
+ phpQuery::selectDocument($id);
+ else
+ $id = phpQuery::$defaultDocumentID;
+ return new phpQueryObject($id);
+ }
+ /**
+ * Creates new document from markup.
+ * Chainable.
+ *
+ * @param unknown_type $markup
+ * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery
+ */
+ public static function newDocument($markup = null, $contentType = null) {
+ if (! $markup)
+ $markup = '';
+ $documentID = phpQuery::createDocumentWrapper($markup, $contentType);
+ return new phpQueryObject($documentID);
+ }
+ /**
+ * Creates new document from markup.
+ * Chainable.
+ *
+ * @param unknown_type $markup
+ * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery
+ */
+ public static function newDocumentHTML($markup = null, $charset = null) {
+ $contentType = $charset
+ ? ";charset=$charset"
+ : '';
+ return self::newDocument($markup, "text/html{$contentType}");
+ }
+ /**
+ * Creates new document from markup.
+ * Chainable.
+ *
+ * @param unknown_type $markup
+ * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery
+ */
+ public static function newDocumentXML($markup = null, $charset = null) {
+ $contentType = $charset
+ ? ";charset=$charset"
+ : '';
+ return self::newDocument($markup, "text/xml{$contentType}");
+ }
+ /**
+ * Creates new document from markup.
+ * Chainable.
+ *
+ * @param unknown_type $markup
+ * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery
+ */
+ public static function newDocumentXHTML($markup = null, $charset = null) {
+ $contentType = $charset
+ ? ";charset=$charset"
+ : '';
+ return self::newDocument($markup, "application/xhtml+xml{$contentType}");
+ }
+ /**
+ * Creates new document from markup.
+ * Chainable.
+ *
+ * @param unknown_type $markup
+ * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery
+ */
+ public static function newDocumentPHP($markup = null, $contentType = "text/html") {
+ // TODO pass charset to phpToMarkup if possible (use DOMDocumentWrapper function)
+ $markup = phpQuery::phpToMarkup($markup, self::$defaultCharset);
+ return self::newDocument($markup, $contentType);
+ }
+ public static function phpToMarkup($php, $charset = 'utf-8') {
+ $regexes = array(
+ '@(<(?!\\?)(?:[^>]|\\?>)+\\w+\\s*=\\s*)(\')([^\']*)<'.'?php?(.*?)(?:\\?>)([^\']*)\'@s',
+ '@(<(?!\\?)(?:[^>]|\\?>)+\\w+\\s*=\\s*)(")([^"]*)<'.'?php?(.*?)(?:\\?>)([^"]*)"@s',
+ );
+ foreach($regexes as $regex)
+ while (preg_match($regex, $php, $matches)) {
+ $php = preg_replace_callback(
+ $regex,
+// create_function('$m, $charset = "'.$charset.'"',
+// 'return $m[1].$m[2]
+// .htmlspecialchars("<"."?php".$m[4]."?".">", ENT_QUOTES|ENT_NOQUOTES, $charset)
+// .$m[5].$m[2];'
+// ),
+ array('phpQuery', '_phpToMarkupCallback'),
+ $php
+ );
+ }
+ $regex = '@(^|>[^<]*)+?(<\?php(.*?)(\?>))@s';
+//preg_match_all($regex, $php, $matches);
+//var_dump($matches);
+ $php = preg_replace($regex, '\\1', $php);
+ return $php;
+ }
+ public static function _phpToMarkupCallback($php, $charset = 'utf-8') {
+ return $m[1].$m[2]
+ .htmlspecialchars("<"."?php".$m[4]."?".">", ENT_QUOTES|ENT_NOQUOTES, $charset)
+ .$m[5].$m[2];
+ }
+ public static function _markupToPHPCallback($m) {
+ return "<"."?php ".htmlspecialchars_decode($m[1])." ?".">";
+ }
+ /**
+ * Converts document markup containing PHP code generated by phpQuery::php()
+ * into valid (executable) PHP code syntax.
+ *
+ * @param string|phpQueryObject $content
+ * @return string PHP code.
+ */
+ public static function markupToPHP($content) {
+ if ($content instanceof phpQueryObject)
+ $content = $content->markupOuter();
+ /* ... to */
+ $content = preg_replace_callback(
+ '@\s*\s*@s',
+// create_function('$m',
+// 'return "<'.'?php ".htmlspecialchars_decode($m[1])." ?'.'>";'
+// ),
+ array('phpQuery', '_markupToPHPCallback'),
+ $content
+ );
+ /* extra space added to save highlighters */
+ $regexes = array(
+ '@(<(?!\\?)(?:[^>]|\\?>)+\\w+\\s*=\\s*)(\')([^\']*)(?:<|%3C)\\?(?:php)?(.*?)(?:\\?(?:>|%3E))([^\']*)\'@s',
+ '@(<(?!\\?)(?:[^>]|\\?>)+\\w+\\s*=\\s*)(")([^"]*)(?:<|%3C)\\?(?:php)?(.*?)(?:\\?(?:>|%3E))([^"]*)"@s',
+ );
+ foreach($regexes as $regex)
+ while (preg_match($regex, $content))
+ $content = preg_replace_callback(
+ $regex,
+ create_function('$m',
+ 'return $m[1].$m[2].$m[3]."", " ", "\n", " ", "{", "$", "}", \'"\', "[", "]"),
+ htmlspecialchars_decode($m[4])
+ )
+ ." ?>".$m[5].$m[2];'
+ ),
+ $content
+ );
+ return $content;
+ }
+
+
+ public static function loadDocumentHTML($html)
+ {
+ self::newDocumentFile($html, null, true);
+ }
+
+ /**
+ * Creates new document from file $file.
+ * Chainable.
+ *
+ * @param string $file URLs allowed. See File wrapper page at php.net for more supported sources.
+ * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery
+ */
+ public static function newDocumentFile($file, $contentType = null, $is_html = false) {
+ if ($is_html)
+ {
+ $documentID = self::createDocumentWrapper(
+ $file, $contentType
+ );
+ }
+ else
+ {
+ $documentID = self::createDocumentWrapper(
+ file_get_contents($file), $contentType
+ );
+ }
+ return new phpQueryObject($documentID);
+ }
+ /**
+ * Creates new document from markup.
+ * Chainable.
+ *
+ * @param unknown_type $markup
+ * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery
+ */
+ public static function newDocumentFileHTML($file, $charset = null) {
+ $contentType = $charset
+ ? ";charset=$charset"
+ : '';
+ return self::newDocumentFile($file, "text/html{$contentType}");
+ }
+ /**
+ * Creates new document from markup.
+ * Chainable.
+ *
+ * @param unknown_type $markup
+ * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery
+ */
+ public static function newDocumentFileXML($file, $charset = null) {
+ $contentType = $charset
+ ? ";charset=$charset"
+ : '';
+ return self::newDocumentFile($file, "text/xml{$contentType}");
+ }
+ /**
+ * Creates new document from markup.
+ * Chainable.
+ *
+ * @param unknown_type $markup
+ * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery
+ */
+ public static function newDocumentFileXHTML($file, $charset = null) {
+ $contentType = $charset
+ ? ";charset=$charset"
+ : '';
+ return self::newDocumentFile($file, "application/xhtml+xml{$contentType}");
+ }
+ /**
+ * Creates new document from markup.
+ * Chainable.
+ *
+ * @param unknown_type $markup
+ * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery
+ */
+ public static function newDocumentFilePHP($file, $contentType = null) {
+ return self::newDocumentPHP(file_get_contents($file), $contentType);
+ }
+ /**
+ * Reuses existing DOMDocument object.
+ * Chainable.
+ *
+ * @param $document DOMDocument
+ * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery
+ * @TODO support DOMDocument
+ */
+ public static function loadDocument($document) {
+ // TODO
+ die('TODO loadDocument');
+ }
+ /**
+ * Enter description here...
+ *
+ * @param unknown_type $html
+ * @param unknown_type $domId
+ * @return unknown New DOM ID
+ * @todo support PHP tags in input
+ * @todo support passing DOMDocument object from self::loadDocument
+ */
+ protected static function createDocumentWrapper($html, $contentType = null, $documentID = null) {
+ if (function_exists('domxml_open_mem'))
+ throw new Exception("Old PHP4 DOM XML extension detected. phpQuery won't work until this extension is enabled.");
+// $id = $documentID
+// ? $documentID
+// : md5(microtime());
+ $document = null;
+ if ($html instanceof DOMDOCUMENT) {
+ if (self::getDocumentID($html)) {
+ // document already exists in phpQuery::$documents, make a copy
+ $document = clone $html;
+ } else {
+ // new document, add it to phpQuery::$documents
+ $wrapper = new DOMDocumentWrapper($html, $contentType, $documentID);
+ }
+ } else {
+ $wrapper = new DOMDocumentWrapper($html, $contentType, $documentID);
+ }
+// $wrapper->id = $id;
+ // bind document
+ phpQuery::$documents[$wrapper->id] = $wrapper;
+ // remember last loaded document
+ phpQuery::selectDocument($wrapper->id);
+ return $wrapper->id;
+ }
+ /**
+ * Extend class namespace.
+ *
+ * @param string|array $target
+ * @param array $source
+ * @TODO support string $source
+ * @return unknown_type
+ */
+ public static function extend($target, $source) {
+ switch($target) {
+ case 'phpQueryObject':
+ $targetRef = &self::$extendMethods;
+ $targetRef2 = &self::$pluginsMethods;
+ break;
+ case 'phpQuery':
+ $targetRef = &self::$extendStaticMethods;
+ $targetRef2 = &self::$pluginsStaticMethods;
+ break;
+ default:
+ throw new Exception("Unsupported \$target type");
+ }
+ if (is_string($source))
+ $source = array($source => $source);
+ foreach($source as $method => $callback) {
+ if (isset($targetRef[$method])) {
+// throw new Exception
+ self::debug("Duplicate method '{$method}', can\'t extend '{$target}'");
+ continue;
+ }
+ if (isset($targetRef2[$method])) {
+// throw new Exception
+ self::debug("Duplicate method '{$method}' from plugin '{$targetRef2[$method]}',"
+ ." can\'t extend '{$target}'");
+ continue;
+ }
+ $targetRef[$method] = $callback;
+ }
+ return true;
+ }
+ /**
+ * Extend phpQuery with $class from $file.
+ *
+ * @param string $class Extending class name. Real class name can be prepended phpQuery_.
+ * @param string $file Filename to include. Defaults to "{$class}.php".
+ */
+ public static function plugin($class, $file = null) {
+ // TODO $class checked agains phpQuery_$class
+// if (strpos($class, 'phpQuery') === 0)
+// $class = substr($class, 8);
+ if (in_array($class, self::$pluginsLoaded))
+ return true;
+ if (! $file)
+ $file = $class.'.php';
+ $objectClassExists = class_exists('phpQueryObjectPlugin_'.$class);
+ $staticClassExists = class_exists('phpQueryPlugin_'.$class);
+ if (! $objectClassExists && ! $staticClassExists)
+ require_once($file);
+ self::$pluginsLoaded[] = $class;
+ // static methods
+ if (class_exists('phpQueryPlugin_'.$class)) {
+ $realClass = 'phpQueryPlugin_'.$class;
+ $vars = get_class_vars($realClass);
+ $loop = isset($vars['phpQueryMethods'])
+ && ! is_null($vars['phpQueryMethods'])
+ ? $vars['phpQueryMethods']
+ : get_class_methods($realClass);
+ foreach($loop as $method) {
+ if ($method == '__initialize')
+ continue;
+ if (! is_callable(array($realClass, $method)))
+ continue;
+ if (isset(self::$pluginsStaticMethods[$method])) {
+ throw new Exception("Duplicate method '{$method}' from plugin '{$c}' conflicts with same method from plugin '".self::$pluginsStaticMethods[$method]."'");
+ return;
+ }
+ self::$pluginsStaticMethods[$method] = $class;
+ }
+ if (method_exists($realClass, '__initialize'))
+ call_user_func_array(array($realClass, '__initialize'), array());
+ }
+ // object methods
+ if (class_exists('phpQueryObjectPlugin_'.$class)) {
+ $realClass = 'phpQueryObjectPlugin_'.$class;
+ $vars = get_class_vars($realClass);
+ $loop = isset($vars['phpQueryMethods'])
+ && ! is_null($vars['phpQueryMethods'])
+ ? $vars['phpQueryMethods']
+ : get_class_methods($realClass);
+ foreach($loop as $method) {
+ if (! is_callable(array($realClass, $method)))
+ continue;
+ if (isset(self::$pluginsMethods[$method])) {
+ throw new Exception("Duplicate method '{$method}' from plugin '{$c}' conflicts with same method from plugin '".self::$pluginsMethods[$method]."'");
+ continue;
+ }
+ self::$pluginsMethods[$method] = $class;
+ }
+ }
+ return true;
+ }
+ /**
+ * Unloades all or specified document from memory.
+ *
+ * @param mixed $documentID @see phpQuery::getDocumentID() for supported types.
+ */
+ public static function unloadDocuments($id = null) {
+ if (isset($id)) {
+ if ($id = self::getDocumentID($id))
+ unset(phpQuery::$documents[$id]);
+ } else {
+ foreach(phpQuery::$documents as $k => $v) {
+ unset(phpQuery::$documents[$k]);
+ }
+ }
+ }
+ /**
+ * Parses phpQuery object or HTML result against PHP tags and makes them active.
+ *
+ * @param phpQuery|string $content
+ * @deprecated
+ * @return string
+ */
+ public static function unsafePHPTags($content) {
+ return self::markupToPHP($content);
+ }
+ public static function DOMNodeListToArray($DOMNodeList) {
+ $array = array();
+ if (! $DOMNodeList)
+ return $array;
+ foreach($DOMNodeList as $node)
+ $array[] = $node;
+ return $array;
+ }
+ /**
+ * Checks if $input is HTML string, which has to start with '<'.
+ *
+ * @deprecated
+ * @param String $input
+ * @return Bool
+ * @todo still used ?
+ */
+ public static function isMarkup($input) {
+ return ! is_array($input) && substr(trim($input), 0, 1) == '<';
+ }
+ public static function debug($text) {
+ if (self::$debug)
+ print var_dump($text);
+ }
+ /**
+ * Make an AJAX request.
+ *
+ * @param array See $options http://docs.jquery.com/Ajax/jQuery.ajax#toptions
+ * Additional options are:
+ * 'document' - document for global events, @see phpQuery::getDocumentID()
+ * 'referer' - implemented
+ * 'requested_with' - TODO; not implemented (X-Requested-With)
+ * @return Zend_Http_Client
+ * @link http://docs.jquery.com/Ajax/jQuery.ajax
+ *
+ * @TODO $options['cache']
+ * @TODO $options['processData']
+ * @TODO $options['xhr']
+ * @TODO $options['data'] as string
+ * @TODO XHR interface
+ */
+ public static function ajax($options = array(), $xhr = null) {
+ $options = array_merge(
+ self::$ajaxSettings, $options
+ );
+ $documentID = isset($options['document'])
+ ? self::getDocumentID($options['document'])
+ : null;
+ if ($xhr) {
+ // reuse existing XHR object, but clean it up
+ $client = $xhr;
+// $client->setParameterPost(null);
+// $client->setParameterGet(null);
+ $client->setAuth(false);
+ $client->setHeaders("If-Modified-Since", null);
+ $client->setHeaders("Referer", null);
+ $client->resetParameters();
+ } else {
+ // create new XHR object
+ require_once('Zend/Http/Client.php');
+ $client = new Zend_Http_Client();
+ $client->setCookieJar();
+ }
+ if (isset($options['timeout']))
+ $client->setConfig(array(
+ 'timeout' => $options['timeout'],
+ ));
+// 'maxredirects' => 0,
+ foreach(self::$ajaxAllowedHosts as $k => $host)
+ if ($host == '.' && isset($_SERVER['HTTP_HOST']))
+ self::$ajaxAllowedHosts[$k] = $_SERVER['HTTP_HOST'];
+ $host = parse_url($options['url'], PHP_URL_HOST);
+ if (! in_array($host, self::$ajaxAllowedHosts)) {
+ throw new Exception("Request not permitted, host '$host' not present in "
+ ."phpQuery::\$ajaxAllowedHosts");
+ }
+ // JSONP
+ $jsre = "/=\\?(&|$)/";
+ if (isset($options['dataType']) && $options['dataType'] == 'jsonp') {
+ $jsonpCallbackParam = $options['jsonp']
+ ? $options['jsonp'] : 'callback';
+ if (strtolower($options['type']) == 'get') {
+ if (! preg_match($jsre, $options['url'])) {
+ $sep = strpos($options['url'], '?')
+ ? '&' : '?';
+ $options['url'] .= "$sep$jsonpCallbackParam=?";
+ }
+ } else if ($options['data']) {
+ $jsonp = false;
+ foreach($options['data'] as $n => $v) {
+ if ($v == '?')
+ $jsonp = true;
+ }
+ if (! $jsonp) {
+ $options['data'][$jsonpCallbackParam] = '?';
+ }
+ }
+ $options['dataType'] = 'json';
+ }
+ if (isset($options['dataType']) && $options['dataType'] == 'json') {
+ $jsonpCallback = 'json_'.md5(microtime());
+ $jsonpData = $jsonpUrl = false;
+ if ($options['data']) {
+ foreach($options['data'] as $n => $v) {
+ if ($v == '?')
+ $jsonpData = $n;
+ }
+ }
+ if (preg_match($jsre, $options['url']))
+ $jsonpUrl = true;
+ if ($jsonpData !== false || $jsonpUrl) {
+ // remember callback name for httpData()
+ $options['_jsonp'] = $jsonpCallback;
+ if ($jsonpData !== false)
+ $options['data'][$jsonpData] = $jsonpCallback;
+ if ($jsonpUrl)
+ $options['url'] = preg_replace($jsre, "=$jsonpCallback\\1", $options['url']);
+ }
+ }
+ $client->setUri($options['url']);
+ $client->setMethod(strtoupper($options['type']));
+ if (isset($options['referer']) && $options['referer'])
+ $client->setHeaders('Referer', $options['referer']);
+ $client->setHeaders(array(
+// 'content-type' => $options['contentType'],
+ 'User-Agent' => 'Mozilla/5.0 (X11; U; Linux x86; en-US; rv:1.9.0.5) Gecko'
+ .'/2008122010 Firefox/3.0.5',
+ // TODO custom charset
+ 'Accept-Charset' => 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
+// 'Connection' => 'keep-alive',
+// 'Accept' => 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
+ 'Accept-Language' => 'en-us,en;q=0.5',
+ ));
+ if ($options['username'])
+ $client->setAuth($options['username'], $options['password']);
+ if (isset($options['ifModified']) && $options['ifModified'])
+ $client->setHeaders("If-Modified-Since",
+ self::$lastModified
+ ? self::$lastModified
+ : "Thu, 01 Jan 1970 00:00:00 GMT"
+ );
+ $client->setHeaders("Accept",
+ isset($options['dataType'])
+ && isset(self::$ajaxSettings['accepts'][ $options['dataType'] ])
+ ? self::$ajaxSettings['accepts'][ $options['dataType'] ].", */*"
+ : self::$ajaxSettings['accepts']['_default']
+ );
+ // TODO $options['processData']
+ if ($options['data'] instanceof phpQueryObject) {
+ $serialized = $options['data']->serializeArray($options['data']);
+ $options['data'] = array();
+ foreach($serialized as $r)
+ $options['data'][ $r['name'] ] = $r['value'];
+ }
+ if (strtolower($options['type']) == 'get') {
+ $client->setParameterGet($options['data']);
+ } else if (strtolower($options['type']) == 'post') {
+ $client->setEncType($options['contentType']);
+ $client->setParameterPost($options['data']);
+ }
+ if (self::$active == 0 && $options['global'])
+ phpQueryEvents::trigger($documentID, 'ajaxStart');
+ self::$active++;
+ // beforeSend callback
+ if (isset($options['beforeSend']) && $options['beforeSend'])
+ phpQuery::callbackRun($options['beforeSend'], array($client));
+ // ajaxSend event
+ if ($options['global'])
+ phpQueryEvents::trigger($documentID, 'ajaxSend', array($client, $options));
+ if (phpQuery::$debug) {
+ self::debug("{$options['type']}: {$options['url']}\n");
+ self::debug("Options: ".var_export($options, true)."
\n");
+// if ($client->getCookieJar())
+// self::debug("Cookies: ".var_export($client->getCookieJar()->getMatchingCookies($options['url']), true)."
\n");
+ }
+ // request
+ $response = $client->request();
+ if (phpQuery::$debug) {
+ self::debug('Status: '.$response->getStatus().' / '.$response->getMessage());
+ self::debug($client->getLastRequest());
+ self::debug($response->getHeaders());
+ }
+ if ($response->isSuccessful()) {
+ // XXX tempolary
+ self::$lastModified = $response->getHeader('Last-Modified');
+ $data = self::httpData($response->getBody(), $options['dataType'], $options);
+ if (isset($options['success']) && $options['success'])
+ phpQuery::callbackRun($options['success'], array($data, $response->getStatus(), $options));
+ if ($options['global'])
+ phpQueryEvents::trigger($documentID, 'ajaxSuccess', array($client, $options));
+ } else {
+ if (isset($options['error']) && $options['error'])
+ phpQuery::callbackRun($options['error'], array($client, $response->getStatus(), $response->getMessage()));
+ if ($options['global'])
+ phpQueryEvents::trigger($documentID, 'ajaxError', array($client, /*$response->getStatus(),*/$response->getMessage(), $options));
+ }
+ if (isset($options['complete']) && $options['complete'])
+ phpQuery::callbackRun($options['complete'], array($client, $response->getStatus()));
+ if ($options['global'])
+ phpQueryEvents::trigger($documentID, 'ajaxComplete', array($client, $options));
+ if ($options['global'] && ! --self::$active)
+ phpQueryEvents::trigger($documentID, 'ajaxStop');
+ return $client;
+// if (is_null($domId))
+// $domId = self::$defaultDocumentID ? self::$defaultDocumentID : false;
+// return new phpQueryAjaxResponse($response, $domId);
+ }
+ protected static function httpData($data, $type, $options) {
+ if (isset($options['dataFilter']) && $options['dataFilter'])
+ $data = self::callbackRun($options['dataFilter'], array($data, $type));
+ if (is_string($data)) {
+ if ($type == "json") {
+ if (isset($options['_jsonp']) && $options['_jsonp']) {
+ $data = preg_replace('/^\s*\w+\((.*)\)\s*$/s', '$1', $data);
+ }
+ $data = self::parseJSON($data);
+ }
+ }
+ return $data;
+ }
+ /**
+ * Enter description here...
+ *
+ * @param array|phpQuery $data
+ *
+ */
+ public static function param($data) {
+ return http_build_query($data, null, '&');
+ }
+ public static function get($url, $data = null, $callback = null, $type = null) {
+ if (!is_array($data)) {
+ $callback = $data;
+ $data = null;
+ }
+ // TODO some array_values on this shit
+ return phpQuery::ajax(array(
+ 'type' => 'GET',
+ 'url' => $url,
+ 'data' => $data,
+ 'success' => $callback,
+ 'dataType' => $type,
+ ));
+ }
+ public static function post($url, $data = null, $callback = null, $type = null) {
+ if (!is_array($data)) {
+ $callback = $data;
+ $data = null;
+ }
+ return phpQuery::ajax(array(
+ 'type' => 'POST',
+ 'url' => $url,
+ 'data' => $data,
+ 'success' => $callback,
+ 'dataType' => $type,
+ ));
+ }
+ public static function getJSON($url, $data = null, $callback = null) {
+ if (!is_array($data)) {
+ $callback = $data;
+ $data = null;
+ }
+ // TODO some array_values on this shit
+ return phpQuery::ajax(array(
+ 'type' => 'GET',
+ 'url' => $url,
+ 'data' => $data,
+ 'success' => $callback,
+ 'dataType' => 'json',
+ ));
+ }
+ public static function ajaxSetup($options) {
+ self::$ajaxSettings = array_merge(
+ self::$ajaxSettings,
+ $options
+ );
+ }
+ public static function ajaxAllowHost($host1, $host2 = null, $host3 = null) {
+ $loop = is_array($host1)
+ ? $host1
+ : func_get_args();
+ foreach($loop as $host) {
+ if ($host && ! in_array($host, phpQuery::$ajaxAllowedHosts)) {
+ phpQuery::$ajaxAllowedHosts[] = $host;
+ }
+ }
+ }
+ public static function ajaxAllowURL($url1, $url2 = null, $url3 = null) {
+ $loop = is_array($url1)
+ ? $url1
+ : func_get_args();
+ foreach($loop as $url)
+ phpQuery::ajaxAllowHost(parse_url($url, PHP_URL_HOST));
+ }
+ /**
+ * Returns JSON representation of $data.
+ *
+ * @static
+ * @param mixed $data
+ * @return string
+ */
+ public static function toJSON($data) {
+ if (function_exists('json_encode'))
+ return json_encode($data);
+ require_once('Zend/Json/Encoder.php');
+ return Zend_Json_Encoder::encode($data);
+ }
+ /**
+ * Parses JSON into proper PHP type.
+ *
+ * @static
+ * @param string $json
+ * @return mixed
+ */
+ public static function parseJSON($json) {
+ if (function_exists('json_decode')) {
+ $return = json_decode(trim($json), true);
+ // json_decode and UTF8 issues
+ if (isset($return))
+ return $return;
+ }
+ require_once('Zend/Json/Decoder.php');
+ return Zend_Json_Decoder::decode($json);
+ }
+ /**
+ * Returns source's document ID.
+ *
+ * @param $source DOMNode|phpQueryObject
+ * @return string
+ */
+ public static function getDocumentID($source) {
+ if ($source instanceof DOMDOCUMENT) {
+ foreach(phpQuery::$documents as $id => $document) {
+ if ($source->isSameNode($document->document))
+ return $id;
+ }
+ } else if ($source instanceof DOMNODE) {
+ foreach(phpQuery::$documents as $id => $document) {
+ if ($source->ownerDocument->isSameNode($document->document))
+ return $id;
+ }
+ } else if ($source instanceof phpQueryObject)
+ return $source->getDocumentID();
+ else if (is_string($source) && isset(phpQuery::$documents[$source]))
+ return $source;
+ }
+ /**
+ * Get DOMDocument object related to $source.
+ * Returns null if such document doesn't exist.
+ *
+ * @param $source DOMNode|phpQueryObject|string
+ * @return string
+ */
+ public static function getDOMDocument($source) {
+ if ($source instanceof DOMDOCUMENT)
+ return $source;
+ $source = self::getDocumentID($source);
+ return $source
+ ? self::$documents[$id]['document']
+ : null;
+ }
+
+ // UTILITIES
+ // http://docs.jquery.com/Utilities
+
+ /**
+ *
+ * @return unknown_type
+ * @link http://docs.jquery.com/Utilities/jQuery.makeArray
+ */
+ public static function makeArray($obj) {
+ $array = array();
+ if (is_object($object) && $object instanceof DOMNODELIST) {
+ foreach($object as $value)
+ $array[] = $value;
+ } else if (is_object($object) && ! ($object instanceof Iterator)) {
+ foreach(get_object_vars($object) as $name => $value)
+ $array[0][$name] = $value;
+ } else {
+ foreach($object as $name => $value)
+ $array[0][$name] = $value;
+ }
+ return $array;
+ }
+ public static function inArray($value, $array) {
+ return in_array($value, $array);
+ }
+ /**
+ *
+ * @param $object
+ * @param $callback
+ * @return unknown_type
+ * @link http://docs.jquery.com/Utilities/jQuery.each
+ */
+ public static function each($object, $callback, $param1 = null, $param2 = null, $param3 = null) {
+ $paramStructure = null;
+ if (func_num_args() > 2) {
+ $paramStructure = func_get_args();
+ $paramStructure = array_slice($paramStructure, 2);
+ }
+ if (is_object($object) && ! ($object instanceof Iterator)) {
+ foreach(get_object_vars($object) as $name => $value)
+ phpQuery::callbackRun($callback, array($name, $value), $paramStructure);
+ } else {
+ foreach($object as $name => $value)
+ phpQuery::callbackRun($callback, array($name, $value), $paramStructure);
+ }
+ }
+ /**
+ *
+ * @link http://docs.jquery.com/Utilities/jQuery.map
+ */
+ public static function map($array, $callback, $param1 = null, $param2 = null, $param3 = null) {
+ $result = array();
+ $paramStructure = null;
+ if (func_num_args() > 2) {
+ $paramStructure = func_get_args();
+ $paramStructure = array_slice($paramStructure, 2);
+ }
+ foreach($array as $v) {
+ $vv = phpQuery::callbackRun($callback, array($v), $paramStructure);
+// $callbackArgs = $args;
+// foreach($args as $i => $arg) {
+// $callbackArgs[$i] = $arg instanceof CallbackParam
+// ? $v
+// : $arg;
+// }
+// $vv = call_user_func_array($callback, $callbackArgs);
+ if (is_array($vv)) {
+ foreach($vv as $vvv)
+ $result[] = $vvv;
+ } else if ($vv !== null) {
+ $result[] = $vv;
+ }
+ }
+ return $result;
+ }
+ /**
+ *
+ * @param $callback Callback
+ * @param $params
+ * @param $paramStructure
+ * @return unknown_type
+ */
+ public static function callbackRun($callback, $params = array(), $paramStructure = null) {
+ if (! $callback)
+ return;
+ if ($callback instanceof CallbackParameterToReference) {
+ // TODO support ParamStructure to select which $param push to reference
+ if (isset($params[0]))
+ $callback->callback = $params[0];
+ return true;
+ }
+ if ($callback instanceof Callback) {
+ $paramStructure = $callback->params;
+ $callback = $callback->callback;
+ }
+ if (! $paramStructure)
+ return call_user_func_array($callback, $params);
+ $p = 0;
+ foreach($paramStructure as $i => $v) {
+ $paramStructure[$i] = $v instanceof CallbackParam
+ ? $params[$p++]
+ : $v;
+ }
+ return call_user_func_array($callback, $paramStructure);
+ }
+ /**
+ * Merge 2 phpQuery objects.
+ * @param array $one
+ * @param array $two
+ * @protected
+ * @todo node lists, phpQueryObject
+ */
+ public static function merge($one, $two) {
+ $elements = $one->elements;
+ foreach($two->elements as $node) {
+ $exists = false;
+ foreach($elements as $node2) {
+ if ($node2->isSameNode($node))
+ $exists = true;
+ }
+ if (! $exists)
+ $elements[] = $node;
+ }
+ return $elements;
+// $one = $one->newInstance();
+// $one->elements = $elements;
+// return $one;
+ }
+ /**
+ *
+ * @param $array
+ * @param $callback
+ * @param $invert
+ * @return unknown_type
+ * @link http://docs.jquery.com/Utilities/jQuery.grep
+ */
+ public static function grep($array, $callback, $invert = false) {
+ $result = array();
+ foreach($array as $k => $v) {
+ $r = call_user_func_array($callback, array($v, $k));
+ if ($r === !(bool)$invert)
+ $result[] = $v;
+ }
+ return $result;
+ }
+ public static function unique($array) {
+ return array_unique($array);
+ }
+ /**
+ *
+ * @param $function
+ * @return unknown_type
+ * @TODO there are problems with non-static methods, second parameter pass it
+ * but doesnt verify is method is really callable
+ */
+ public static function isFunction($function) {
+ return is_callable($function);
+ }
+ public static function trim($str) {
+ return trim($str);
+ }
+ /* PLUGINS NAMESPACE */
+ /**
+ *
+ * @param $url
+ * @param $callback
+ * @param $param1
+ * @param $param2
+ * @param $param3
+ * @return phpQueryObject
+ */
+ public static function browserGet($url, $callback, $param1 = null, $param2 = null, $param3 = null) {
+ if (self::plugin('WebBrowser')) {
+ $params = func_get_args();
+ return self::callbackRun(array(self::$plugins, 'browserGet'), $params);
+ } else {
+ self::debug('WebBrowser plugin not available...');
+ }
+ }
+ /**
+ *
+ * @param $url
+ * @param $data
+ * @param $callback
+ * @param $param1
+ * @param $param2
+ * @param $param3
+ * @return phpQueryObject
+ */
+ public static function browserPost($url, $data, $callback, $param1 = null, $param2 = null, $param3 = null) {
+ if (self::plugin('WebBrowser')) {
+ $params = func_get_args();
+ return self::callbackRun(array(self::$plugins, 'browserPost'), $params);
+ } else {
+ self::debug('WebBrowser plugin not available...');
+ }
+ }
+ /**
+ *
+ * @param $ajaxSettings
+ * @param $callback
+ * @param $param1
+ * @param $param2
+ * @param $param3
+ * @return phpQueryObject
+ */
+ public static function browser($ajaxSettings, $callback, $param1 = null, $param2 = null, $param3 = null) {
+ if (self::plugin('WebBrowser')) {
+ $params = func_get_args();
+ return self::callbackRun(array(self::$plugins, 'browser'), $params);
+ } else {
+ self::debug('WebBrowser plugin not available...');
+ }
+ }
+ /**
+ *
+ * @param $code
+ * @return string
+ */
+ public static function php($code) {
+ return self::code('php', $code);
+ }
+ /**
+ *
+ * @param $type
+ * @param $code
+ * @return string
+ */
+ public static function code($type, $code) {
+ return "<$type>$type>";
+ }
+
+ public static function __callStatic($method, $params) {
+ return call_user_func_array(
+ array(phpQuery::$plugins, $method),
+ $params
+ );
+ }
+ protected static function dataSetupNode($node, $documentID) {
+ // search are return if alredy exists
+ foreach(phpQuery::$documents[$documentID]->dataNodes as $dataNode) {
+ if ($node->isSameNode($dataNode))
+ return $dataNode;
+ }
+ // if doesn't, add it
+ phpQuery::$documents[$documentID]->dataNodes[] = $node;
+ return $node;
+ }
+ protected static function dataRemoveNode($node, $documentID) {
+ // search are return if alredy exists
+ foreach(phpQuery::$documents[$documentID]->dataNodes as $k => $dataNode) {
+ if ($node->isSameNode($dataNode)) {
+ unset(self::$documents[$documentID]->dataNodes[$k]);
+ unset(self::$documents[$documentID]->data[ $dataNode->dataID ]);
+ }
+ }
+ }
+ public static function data($node, $name, $data, $documentID = null) {
+ if (! $documentID)
+ // TODO check if this works
+ $documentID = self::getDocumentID($node);
+ $document = phpQuery::$documents[$documentID];
+ $node = self::dataSetupNode($node, $documentID);
+ if (! isset($node->dataID))
+ $node->dataID = ++phpQuery::$documents[$documentID]->uuid;
+ $id = $node->dataID;
+ if (! isset($document->data[$id]))
+ $document->data[$id] = array();
+ if (! is_null($data))
+ $document->data[$id][$name] = $data;
+ if ($name) {
+ if (isset($document->data[$id][$name]))
+ return $document->data[$id][$name];
+ } else
+ return $id;
+ }
+ public static function removeData($node, $name, $documentID) {
+ if (! $documentID)
+ // TODO check if this works
+ $documentID = self::getDocumentID($node);
+ $document = phpQuery::$documents[$documentID];
+ $node = self::dataSetupNode($node, $documentID);
+ $id = $node->dataID;
+ if ($name) {
+ if (isset($document->data[$id][$name]))
+ unset($document->data[$id][$name]);
+ $name = null;
+ foreach($document->data[$id] as $name)
+ break;
+ if (! $name)
+ self::removeData($node, $name, $documentID);
+ } else {
+ self::dataRemoveNode($node, $documentID);
+ }
+ }
+}
+/**
+ * Plugins static namespace class.
+ *
+ * @author Tobiasz Cudnik
+ * @package phpQuery
+ * @todo move plugin methods here (as statics)
+ */
+class phpQueryPlugins {
+ public function __call($method, $args) {
+ if (isset(phpQuery::$extendStaticMethods[$method])) {
+ $return = call_user_func_array(
+ phpQuery::$extendStaticMethods[$method],
+ $args
+ );
+ } else if (isset(phpQuery::$pluginsStaticMethods[$method])) {
+ $class = phpQuery::$pluginsStaticMethods[$method];
+ $realClass = "phpQueryPlugin_$class";
+ $return = call_user_func_array(
+ array($realClass, $method),
+ $args
+ );
+ return isset($return)
+ ? $return
+ : $this;
+ } else
+ throw new Exception("Method '{$method}' doesnt exist");
+ }
+}
+/**
+ * Shortcut to phpQuery::pq($arg1, $context)
+ * Chainable.
+ *
+ * @see phpQuery::pq()
+ * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery
+ * @author Tobiasz Cudnik
+ * @package phpQuery
+ */
+function pq($arg1, $context = null) {
+ $args = func_get_args();
+ return call_user_func_array(
+ array('phpQuery', 'pq'),
+ $args
+ );
+}
+// add plugins dir and Zend framework to include path
+set_include_path(
+ get_include_path()
+ .PATH_SEPARATOR.dirname(__FILE__).'/phpQuery/'
+ .PATH_SEPARATOR.dirname(__FILE__).'/phpQuery/plugins/'
+);
+// why ? no __call nor __get for statics in php...
+// XXX __callStatic will be available in PHP 5.3
+phpQuery::$plugins = new phpQueryPlugins();
+// include bootstrap file (personal library config)
+if (file_exists(dirname(__FILE__).'/phpQuery/bootstrap.php'))
+ require_once dirname(__FILE__).'/phpQuery/bootstrap.php';
diff --git a/vendor/owner888/phpspider/library/rolling_curl.php b/vendor/owner888/phpspider/library/rolling_curl.php
new file mode 100644
index 0000000..fd8b81f
--- /dev/null
+++ b/vendor/owner888/phpspider/library/rolling_curl.php
@@ -0,0 +1,466 @@
+
+ * @copyright seatle
+ * @link http://www.epooll.com/
+ * @license http://www.opensource.org/licenses/mit-license.php MIT License
+ */
+
+class rolling_curl
+{
+ /**
+ * @var float
+ *
+ * 同时运行任务数
+ * 例如:有8个请求,则会被分成两批,第一批5个请求,第二批3个请求
+ * 注意:采集知乎的时候,5个是比较稳定的,7个以上就开始会超时了,多进程就没有这样的问题,因为多进程很少几率会发生并发
+ */
+ public $window_size = 5;
+
+ /**
+ * @var float
+ *
+ * Timeout is the timeout used for curl_multi_select.
+ */
+ private $timeout = 10;
+
+ /**
+ * @var string|array
+ *
+ * 应用在每个请求的回调函数
+ */
+ public $callback;
+
+ /**
+ * @var array
+ *
+ * 设置默认的请求参数
+ */
+ protected $options = array(
+ CURLOPT_SSL_VERIFYPEER => 0,
+ CURLOPT_RETURNTRANSFER => 1,
+ // 注意:TIMEOUT = CONNECTTIMEOUT + 数据获取时间,所以 TIMEOUT 一定要大于 CONNECTTIMEOUT,否则 CONNECTTIMEOUT 设置了就没意义
+ // "Connection timed out after 30001 milliseconds"
+ CURLOPT_CONNECTTIMEOUT => 30,
+ CURLOPT_TIMEOUT => 60,
+ CURLOPT_RETURNTRANSFER => 1,
+ CURLOPT_HEADER => 0,
+ // 在多线程处理场景下使用超时选项时,会忽略signals对应的处理函数,但是无耐的是还有小概率的crash情况发生
+ CURLOPT_NOSIGNAL => 1,
+ CURLOPT_USERAGENT => "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.89 Safari/537.36",
+ );
+
+ /**
+ * @var array
+ */
+ private $headers = array();
+
+ /**
+ * @var Request[]
+ *
+ * 请求队列
+ */
+ private $requests = array();
+
+ /**
+ * @var RequestMap[]
+ *
+ * Maps handles to request indexes
+ */
+ private $requestMap = array();
+
+ public function __construct()
+ {
+ }
+
+ /**
+ * set timeout
+ *
+ * @param init $timeout
+ * @return
+ */
+ public function set_timeout($timeout)
+ {
+ $this->options[CURLOPT_TIMEOUT] = $timeout;
+ }
+
+ /**
+ * set proxy
+ *
+ */
+ public function set_proxy($proxy)
+ {
+ $this->options[CURLOPT_PROXY] = $proxy;
+ }
+
+ /**
+ * set referer
+ *
+ */
+ public function set_referer($referer)
+ {
+ $this->options[CURLOPT_REFERER] = $referer;
+ }
+
+ /**
+ * 设置 user_agent
+ *
+ * @param string $useragent
+ * @return void
+ */
+ public function set_useragent($useragent)
+ {
+ $this->options[CURLOPT_USERAGENT] = $useragent;
+ }
+
+ /**
+ * 设置COOKIE
+ *
+ * @param string $cookie
+ * @return void
+ */
+ public function set_cookie($cookie)
+ {
+ $this->options[CURLOPT_COOKIE] = $cookie;
+ }
+
+ /**
+ * 设置COOKIE JAR
+ *
+ * @param string $cookie_jar
+ * @return void
+ */
+ public function set_cookiejar($cookiejar)
+ {
+ $this->options[CURLOPT_COOKIEJAR] = $cookiejar;
+ }
+
+ /**
+ * 设置COOKIE FILE
+ *
+ * @param string $cookie_file
+ * @return void
+ */
+ public function set_cookiefile($cookiefile)
+ {
+ $this->options[CURLOPT_COOKIEFILE] = $cookiefile;
+ }
+
+ /**
+ * 获取内容的时候是不是连header也一起获取
+ *
+ * @param mixed $http_raw
+ * @return void
+ * @author seatle
+ * @created time :2016-09-18 10:17
+ */
+ public function set_http_raw($http_raw = false)
+ {
+ $this->options[CURLOPT_HEADER] = $http_raw;
+ }
+
+ /**
+ * 设置IP
+ *
+ * @param string $ip
+ * @return void
+ */
+ public function set_ip($ip)
+ {
+ $headers = array(
+ 'CLIENT-IP'=>$ip,
+ 'X-FORWARDED-FOR'=>$ip,
+ );
+ $this->headers = $this->headers + $headers;
+ }
+
+ /**
+ * 设置Headers
+ *
+ * @param string $headers
+ * @return void
+ */
+ public function set_headers($headers)
+ {
+ $this->headers = $this->headers + $headers;
+ }
+
+ /**
+ * 设置Hosts
+ *
+ * @param string $hosts
+ * @return void
+ */
+ public function set_hosts($hosts)
+ {
+ $headers = array(
+ 'Host'=>$hosts,
+ );
+ $this->headers = $this->headers + $headers;
+ }
+
+ /**
+ * 设置Gzip
+ *
+ * @param string $hosts
+ * @return void
+ */
+ public function set_gzip($gzip)
+ {
+ if ($gzip)
+ {
+ $this->options[CURLOPT_ENCODING] = 'gzip';
+ }
+ }
+
+ public function request($url, $method = "GET", $fields = array(), $headers = array(), $options = array())
+ {
+ $this->requests[] = array('url'=>$url,'method'=>$method,'fields'=>$fields,'headers'=>$headers,'options'=>$options);
+ return true;
+ }
+
+ public function get_options($request)
+ {
+ $options = $this->options;
+ $headers = $this->headers;
+
+ if (ini_get('safe_mode') == 'Off' || !ini_get('safe_mode'))
+ {
+ $options[CURLOPT_FOLLOWLOCATION] = 1;
+ $options[CURLOPT_MAXREDIRS] = 5;
+ }
+
+ // 如果是 get 方式,直接拼凑一个 url 出来
+ if (strtolower($request['method']) == 'get' && !empty($request['fields']))
+ {
+ $url = $request['url'] . "?" . http_build_query($request['fields']);
+ }
+ // 如果是 post 方式
+ if (strtolower($request['method']) == 'post')
+ {
+ $options[CURLOPT_POST] = 1;
+ $options[CURLOPT_POSTFIELDS] = $request['fields'];
+ }
+
+ // append custom options for this specific request
+ if ($request['options'])
+ {
+ $options = $request['options'] + $options;
+ }
+
+ if ($request['headers'])
+ {
+ $headers = $request['headers'] + $headers;
+ }
+
+ // 随机绑定 hosts,做负载均衡
+ //if (self::$hosts)
+ //{
+ //$parse_url = parse_url($url);
+ //$host = $parse_url['host'];
+ //$key = rand(0, count(self::$hosts)-1);
+ //$ip = self::$hosts[$key];
+ //$url = str_replace($host, $ip, $url);
+ //self::$headers = array_merge( array('Host:'.$host), self::$headers );
+ //}
+
+ // header 要这样拼凑
+ $headers_tmp = array();
+ foreach ($headers as $k=>$v)
+ {
+ $headers_tmp[] = $k.":".$v;
+ }
+ $headers = $headers_tmp;
+
+ $options[CURLOPT_URL] = $request['url'];
+ $options[CURLOPT_HTTPHEADER] = $headers;
+
+ return $options;
+ }
+
+ /**
+ * GET 请求
+ *
+ * @param string $url
+ * @param array $headers
+ * @param array $options
+ * @return bool
+ */
+ public function get($url, $fields = array(), $headers = array(), $options = array())
+ {
+ return $this->request($url, 'get', $fields, $headers, $options);
+ }
+
+ /**
+ * $fields 有三种类型:1、数组;2、http query;3、json
+ * 1、array('name'=>'yangzetao') 2、http_build_query(array('name'=>'yangzetao')) 3、json_encode(array('name'=>'yangzetao'))
+ * 前两种是普通的post,可以用$_POST方式获取
+ * 第三种是post stream( json rpc,其实就是webservice ),虽然是post方式,但是只能用流方式 http://input 后者 $HTTP_RAW_POST_DATA 获取
+ *
+ * @param string $url
+ * @param array $fields
+ * @param array $headers
+ * @param array $options
+ * @return void
+ */
+ public function post($url, $fields = array(), $headers = array(), $options = array())
+ {
+ return $this->request($url, 'post', $fields, $headers, $options);
+ }
+
+ /**
+ * Execute processing
+ *
+ * @param int $window_size Max number of simultaneous connections
+ * @return string|bool
+ */
+ public function execute($window_size = null)
+ {
+ $count = sizeof($this->requests);
+ if ($count == 0)
+ {
+ return false;
+ }
+ // 只有一个请求
+ elseif ($count == 1)
+ {
+ return $this->single_curl();
+ }
+ else
+ {
+ // 开始 rolling curl,window_size 是最大同时连接数
+ return $this->rolling_curl($window_size);
+ }
+ }
+
+ private function single_curl()
+ {
+ $ch = curl_init();
+ // 从请求队列里面弹出一个来
+ $request = array_shift($this->requests);
+ $options = $this->get_options($request);
+ curl_setopt_array($ch, $options);
+ $output = curl_exec($ch);
+ $info = curl_getinfo($ch);
+ $error = null;
+ if ($output === false)
+ {
+ $error = curl_error( $ch );
+ }
+ //$output = substr($output, 10);
+ //$output = gzinflate($output);
+
+ // 其实一个请求的时候没是么必要回调,直接返回数据就好了,不过这里算是多一个功能吧,和多请求保持一样的操作
+ if ($this->callback)
+ {
+ if (is_callable($this->callback))
+ {
+ call_user_func($this->callback, $output, $info, $request, $error);
+ }
+ }
+ else
+ {
+ return $output;
+ }
+ return true;
+ }
+
+ private function rolling_curl($window_size = null)
+ {
+ // 如何设置了最大任务数
+ if ($window_size)
+ $this->window_size = $window_size;
+
+ // 如果请求数 小于 任务数,设置任务数为请求数
+ if (sizeof($this->requests) < $this->window_size)
+ $this->window_size = sizeof($this->requests);
+
+ // 如果任务数小于2个,不应该用这个方法的,用上面的single_curl方法就好了
+ if ($this->window_size < 2)
+ exit("Window size must be greater than 1");
+
+ // 初始化任务队列
+ $master = curl_multi_init();
+
+ // 开始第一批请求
+ for ($i = 0; $i < $this->window_size; $i++)
+ {
+ $ch = curl_init();
+ $options = $this->get_options($this->requests[$i]);
+ curl_setopt_array($ch, $options);
+ curl_multi_add_handle($master, $ch);
+ // 添加到请求数组
+ $key = (string) $ch;
+ $this->requestMap[$key] = $i;
+ }
+
+ do {
+ while (($execrun = curl_multi_exec($master, $running)) == CURLM_CALL_MULTI_PERFORM) ;
+
+ // 如果
+ if ($execrun != CURLM_OK) { break; }
+
+ // 一旦有一个请求完成,找出来,因为curl底层是select,所以最大受限于1024
+ while ($done = curl_multi_info_read($master))
+ {
+ // 从请求中获取信息、内容、错误
+ $info = curl_getinfo($done['handle']);
+ $output = curl_multi_getcontent($done['handle']);
+ $error = curl_error($done['handle']);
+
+ // 如果绑定了回调函数
+ $callback = $this->callback;
+ if (is_callable($callback))
+ {
+ $key = (string) $done['handle'];
+ $request = $this->requests[$this->requestMap[$key]];
+ unset($this->requestMap[$key]);
+ call_user_func($callback, $output, $info, $request, $error);
+ }
+
+ // 一个请求完了,就加一个进来,一直保证5个任务同时进行
+ if ($i < sizeof($this->requests) && isset($this->requests[$i]) && $i < count($this->requests))
+ {
+ $ch = curl_init();
+ $options = $this->get_options($this->requests[$i]);
+ curl_setopt_array($ch, $options);
+ curl_multi_add_handle($master, $ch);
+
+ // 添加到请求数组
+ $key = (string) $ch;
+ $this->requestMap[$key] = $i;
+ $i++;
+ }
+ // 把请求已经完成了得 curl handle 删除
+ curl_multi_remove_handle($master, $done['handle']);
+ }
+
+ // 当没有数据的时候进行堵塞,把 CPU 使用权交出来,避免上面 do 死循环空跑数据导致 CPU 100%
+ if ($running)
+ {
+ curl_multi_select($master, $this->timeout);
+ }
+
+ } while ($running);
+ // 关闭任务
+ curl_multi_close($master);
+
+ // 把请求清空,否则没有重新 new rolling_curl(); 直接再次导入一批url的时候,就会把前面已经执行过的url又执行一轮
+ unset($this->requests);
+ return true;
+ }
+
+ /**
+ * @return void
+ */
+ public function __destruct()
+ {
+ unset($this->window_size, $this->callback, $this->options, $this->headers, $this->requests);
+ }
+}
diff --git a/vendor/owner888/phpspider/test.php b/vendor/owner888/phpspider/test.php
new file mode 100644
index 0000000..79e7d24
--- /dev/null
+++ b/vendor/owner888/phpspider/test.php
@@ -0,0 +1,7 @@
+addServer('10.10.10.238');
+$gmworker->addFunction("reverse", "reverse_fn");
+
+print "Waiting for job...\n";
+while($gmworker->work())
+{
+ if ($gmworker->returnCode() != GEARMAN_SUCCESS)
+ {
+ echo "return_code: " . $gmworker->returnCode() . "\n";
+ break;
+ }
+ //break;
+}
+
+function reverse_fn($job)
+{
+ sleep(3);
+ echo $job->workload()."\n";
+ return strrev($job->workload());
+}
+
+
+echo "hello\n";
+?>
+
+
+
diff --git a/wiki/Home.md b/wiki/Home.md
new file mode 100644
index 0000000..2c41468
--- /dev/null
+++ b/wiki/Home.md
@@ -0,0 +1,21 @@
+## 功能
+IYUU自动辅种工具,目前能对国内大部分的PT站点自动辅种;支持下载器集群,支持多盘位,支持多下载目录,支持远程连接等。
+
+## 原理
+IYUU自动辅种工具(英文名:iyuuAutoReseed),是一款PHP语言编写的Private Tracker辅种脚本,通过计划任务或常驻内存,按指定频率调用transmission、qBittorrent下载软件的API接口,提取正在做种的info_hash提交到服务器API接口,根据API接口返回的数据拼接种子连接,提交给下载器,自动辅种各个站点。
+
+## 运行环境
+所有具备PHP运行环境的所有平台!
+例如:Linux、Windows、MacOS
+
+## 需求提交/错误反馈
+ - 点击链接加入群聊【IYUU自动辅种交流】:[https://jq.qq.com/?_wv=1027&k=5JOfOlM][1]
+
+## 捐助开发者
+如果觉得我的付出,节约了您的宝贵时间,请随意打赏一杯咖啡!或者一杯水!
+您所有的打赏将用于服务器续期,增加服务的延续性。
+![微信打赏.png][2]
+
+
+[1]: https://jq.qq.com/?_wv=1027&k=5JOfOlM
+[2]: https://www.iyuu.cn/usr/uploads/2019/12/801558607.png
\ No newline at end of file
diff --git a/wiki/合作站点鉴权配置.md b/wiki/合作站点鉴权配置.md
new file mode 100644
index 0000000..4360b81
--- /dev/null
+++ b/wiki/合作站点鉴权配置.md
@@ -0,0 +1,27 @@
+## 重点讲解Ourbits站点的鉴权配置
+博客链接:https://www.iyuu.cn/archives/337/
+IYUU自动辅种工具、Ourbits双方达成合作,可以对使用接口的用户,实现认证。
+### 申请爱语飞飞微信通知token,新用户访问:http://iyuu.cn 申请!
+1.点击`开始使用`,出现二维码,用`微信扫码`
+![微信通知1.png][1]
+![微信通知2.png][2]
+![微信通知3.png][3]
+2.复制您的token令牌到`/app/config/config.php`文件内的`iyuu.cn`对应的配置字段,保存。如图:
+![微信通知4.png][4]
+
+### 设置Ourbits:
+![编辑配置4.png][5]
+`passkey`,在你的控制面板 - 密钥
+`is_vip`,根据你的实际情况填写,因站点有下载种子的流控,如果你不在限制之列,可以`设置为1`
+`id`,为用户中心打开后,浏览器地址栏**http://xxxxx.xxx/userdetails.php?id=`46880`**等号=后面的几个数字,如图:
+![编辑配置6.png][6]
+
+到此,配置文件编辑完毕,请记得保存。
+如果提示保存格式,请保存为UTF8(无BOM)格式。
+
+ [1]: https://www.iyuu.cn/usr/uploads/2019/12/2331433923.png
+ [2]: https://www.iyuu.cn/usr/uploads/2019/12/3324442680.png
+ [3]: https://www.iyuu.cn/usr/uploads/2019/12/3181272964.png
+ [4]: https://www.iyuu.cn/usr/uploads/2019/12/3669828008.png
+ [5]: https://www.iyuu.cn/usr/uploads/2019/12/3696916642.png
+ [6]: https://www.iyuu.cn/usr/uploads/2019/12/1230288911.png
\ No newline at end of file
diff --git a/wiki/命令汇总.md b/wiki/命令汇总.md
new file mode 100644
index 0000000..dd99eb6
--- /dev/null
+++ b/wiki/命令汇总.md
@@ -0,0 +1,10 @@
+## 【特别提示】
+php命令与脚本路径之间是有个空格,请注意!请注意!请注意!
+
+
+## IYUU自动辅种命令:
+`php ./iyuu.cn.php`
+
+
+
+### 【重要说明:实际路径,以你实际的为准,切勿生搬硬套!】
\ No newline at end of file
diff --git a/wiki/常见问题.md b/wiki/常见问题.md
new file mode 100644
index 0000000..1e00071
--- /dev/null
+++ b/wiki/常见问题.md
@@ -0,0 +1,83 @@
+## 常见问题FAQ
+
+#### 问:这款脚本会不会泄露我的秘钥、cookie、客户端连接密码?
+
+答:绝对不会!!代码全开源,能经受审查!所有私密配置只在本地存储使用,绝不会发送给任何第三方。
+
+#### 问:只使用IYUU自动辅种,需要配置各站的cookie吗?
+
+答:只需配置全局客户端和各网站的passkey密钥(没有配置passkey的站点,在辅种时候会跳过)。2019年12月28日补充:辅种hdcity、hdchina需要配置cookie。
+
+#### 问:IYUU自动辅种工具,向服务器发送了什么实现自动辅种呢?
+
+答:1.文件`phpspider\app\torrent\cache\hashString.txt`是脚本发送给服务器的数据,是按下载器分组的种子info_hash;2.文件`phpspider\app\torrent\cache\reseed.txt`是服务器返回的可辅种数据。
+
+#### 问:本次添加成功的辅种任务,下次辅种时还会重复添加吗?
+
+答:添加成功的辅种任务,会在本地生成缓存记录,避免重复添加辅种任务,路径在:`phpspider\app\torrent\cachehash`。
+
+#### 问:为什么有些站点自动跳过?
+
+答:因为站点在下载种子时有流控或者人机验证,会导致辅种失败;但脚本会在`phpspider\app\torrent\cache`目录下生成以站点命名的手动辅种文本。
+
+#### 问:我拥有辅种时自动跳过站点的特殊权限,如何设置为可以辅种呢?
+
+答:在站点的独立配置区域,添加一行代码`'is_vip' => 1,`即可。例如Ourbits:
+
+```php
+// ourbits
+'ourbits' => array(
+ // 如果需要用下载免费种脚本,须配置(只是自动辅种,可以不配置此项)
+ 'cookie' => '',
+ // 如果需要自动辅种,必须配置
+ 'passkey' => '',
+ 'id' => 46880, // 用户ID
+ 'is_vip' => 1, // 是否具有VIP或特殊权限?0 普通,1 VIP
+),
+```
+
+
+
+#### 问:如何升级到最新版本?
+
+答:从github或码云仓库,下载最新的源码,覆盖到本地即可。
+
+#### 问:为啥我编辑配置后,运行的时候显示乱码?
+
+答:保存的编码格式不对,正确的格式为UTF8(无BOM);推荐编辑器:`VS code`、`EditPlu`s、`SublimeText`、`Notepad++`。
+
+#### 问:为什么用IYUU自动辅种,有些种子无法校验通过?
+
+答:首先,这个属于正常现象。 只要IYUU自动辅种匹配过来,然后校验通不过的,分为以下几种情况:
+
+1、被改了文件名,重新做种
+
+2、被改了顶层目录名,重新做种
+
+3、把单文件放进了目录里面,重新做种
+
+4、更改了部分文件,例如nfo文件,重新做种
+
+一般情况下,通过分析种子结构,创建软连接,90%以上都可以辅种成功。
+也可以不管他或删除校验失败的任务(不要删除数据)。
+
+#### 问:如何创建软连接、硬链接手动辅种?
+
+答:Windows命令: `mklink`, Linux命令: `ln -s`, 更详细的用法请百度。
+
+#### 问:IYUU自动辅种,添加计划任务后多久运行一次比较合适?
+
+答:为减轻服务器压力,推荐间隔3小时以上(太频繁的调用接口,可能被封禁)。
+
+#### 问:猫站的Tracker为啥是http,而不是https?
+
+答:请退出登录,在登录时勾选下面两个SSL的选项,登录后复制cookie,重新配置。
+
+#### 问:如何反馈问题?
+
+答:1、点击链接加入群聊【IYUU自动辅种交流】:[https://jq.qq.com/?_wv=1027&k=5JOfOlM][1]
+
+2、QQ群:859882209
+
+3、issues: https://gitee.com/ledc/IYUUAutoReseed/issues
+
diff --git a/wiki/开发计划.md b/wiki/开发计划.md
new file mode 100644
index 0000000..08d45c1
--- /dev/null
+++ b/wiki/开发计划.md
@@ -0,0 +1,15 @@
+## 开发计划
+| 功能 | 开发状态 | 预计开发时间 | 开发完成时间 |
+| - | :-: | ---- | ---- |
+| 微信鉴权 | 已完成 | 2019年12月22日 | 2019年12月23日 |
+| 流控站点,手动辅种 | 已完成 | 2019年12月24日 | 2019年12月24日 |
+| m-team IPv4、IPv6自定义配置 | 已完成 | 2019年12月25日 | 2019年12月25日 |
+| 未配置客户端智能过滤 | 已完成 | 2019年12月25日 | 2019年12月25日 |
+| 自动辅种结束微信通知 | 已完成 | 2019年12月25日 | 2019年12月27日 |
+| 做种客户端间转移 | 已完成 | 2019年12月25日 | 2020年1月14日 |
+| 手动辅种按目录分组 | 已完成 | 2019年12月26日 | 2020年1月14日 |
+| WEB页面生成配置 | 暂未开始 | | |
+| 自动转移客户端 | 暂未开始 | | |
+| 脚本docker容器化 | 暂未开始 | | |
+| 浏览器插件 | 暂未开始 | | |
+| 合集自动拆包辅种 | 暂未开始 | | |
diff --git a/wiki/更新历史.md b/wiki/更新历史.md
new file mode 100644
index 0000000..b1c6895
--- /dev/null
+++ b/wiki/更新历史.md
@@ -0,0 +1,109 @@
+### 2020年1月14日
+
+更新hdbug域名,删除下载免费种冗余文件。
+
+### 2020年1月10日
+
+修复:qBittorrent打开自动管理时,自动辅种目录对应错误的问题。
+
+### 2020年1月9日
+
+优化:萌猫tracker的IP类型改为可配置;
+优化:自动辅种时添加的任务,校验后自动暂停(无需更改全局)。
+
+### 2020年1月5日
+
+修复:城市cuhash变化无法辅种的问题
+
+### 2020年1月1日
+
+新增:scg
+
+修复:
+
+1.转移客户端做种支持磁力链
+
+2.萌猫抓取问题
+
+3.瓷器抓取标题的问题
+
+### 2019年12月27日
+
+新增功能:自动辅种结束,微信通知统计信息,优化城市适配,新增discfan(GZT)。
+
+### 2019年12月25日
+
+1.新增支持upxin(HDU)、oshen
+
+------
+
+### 2019年12月25日
+1.馒头支持ipv4、ipv6选择
+2.未配置的全局客户端智能过滤,不会再影响自动辅种
+
+------
+
+### 2019年12月24日
+新增hdstreet、joyhd、u2
+
+------
+
+### 2019年12月23日
+鉴权模式上线试运行
+
+------
+
+### 2019年12月21日
+
+新增兽站、opencd、hdbug;
+
+------
+
+### 2019年12月20日
+
+新增1ptba、hdtime
+
+------
+
+### 2019年12月17日
+
+新增站点瓷器;
+
+------
+
+### 2019年12月16日
+
+新增leaguehd、聆音;
+
+------
+
+### 2019年12月15日
+
+1.自动辅种20个站;
+2.支持qBittorrent做种转transmission
+3.新增qBittorrent自动辅种时的状态过滤,只辅种已完成的种子
+
+------
+
+### 2019年12月12日
+
+目前支持17个站点的自动辅种;
+目前支持18个站点下载免费种;
+
+------
+
+### 2019年12月10日
+
+自动辅种工具完成!
+
+------
+
+### 2019年11月19日
+
+我堡、天空 完美适配,支持大小、做种数、下载数筛选。
+
+------
+
+技术讨论及后续更新,请加入QQ群!
+**群名称:IYUU自动辅种交流**
+**QQ群号:859882209**
\ No newline at end of file
diff --git a/wiki/自动辅种最简配置(windows篇).md b/wiki/自动辅种最简配置(windows篇).md
new file mode 100644
index 0000000..6dda3bd
--- /dev/null
+++ b/wiki/自动辅种最简配置(windows篇).md
@@ -0,0 +1,102 @@
+以下教程以windows为基础进行讲解,其他系统同理。
+博客链接:https://www.iyuu.cn/archives/324/
+## 第一步 下载压缩包
+从[码云仓库][1],下载最新源码,解压缩到D盘的根目录下。
+
+## 第二步 复制一份配置文件
+打开`D:\IYUUAutoReseed\app\config`目录,复制一份`config.sample.php`,另存为`config.php`。
+
+这样操作后,需要升级新版本时,直接覆盖即可,不会影响到配置。
+
+## 第三步 编辑配置文件
+提醒:千万不要用windows记事本来编辑配置文件(会导致乱码)!!
+推荐编辑软件:`VS code`、`EditPlus`、`SublimeText`、`Notepad++`等(保存格式,选UTF8 无BOM);
+配置文件内容较多,新手往往很迷茫,不知道改哪里,在这里我重点强调2个步骤:
+`1.编辑全局客户端; 2.编辑各站的秘钥,即passkey。`
+
+其他配置,如果不懂也没有关系;先保持默认,等脚本运行起来,再修改也不迟。另外,修改时一定要细心,仔细看教程。
+打开`D:\IYUUAutoReseed\app\config\config.php`文件,如下图:
+![编辑配置1.png][2]
+
+### 填写全局客户端
+上图红框内的是`transmission`的示例配置,绿框是`qBittorrent`的示例配置;
+IYUU自动辅种工具,目前支持这两种下载器,支持多盘位,辅种时全自动对应资源的下载目录。
+1,编辑`transmission`下载器
+`http://127.0.0.1:9091/transmission/rpc`是下载器的连接参数,你要修改的部分是`127.0.0.1:9091`改成你的IP与端口(本机使用无需修改),局域网内的机器请填写局域网IP与端口;远程使用请填写DDNS的远程连接域名与端口。
+username是用户名、password是密码。
+如果你没有用到`transmission`下载器,请把红框的内容都删除。
+
+2,编辑`qBittorrent`下载器
+方法与上一步相同,只需填写ip、端口、用户名、密码即可。如果您是windows下的qBittorrent,请参考下图打开`WEB用户界面`:
+![qb设置WEB用户界面.png][3]
+
+因为我两个下载器都在用,编辑好后,如图:
+![编辑配置2.png][4]
+
+### 填写各站秘钥passkey
+IYUU自动辅种:需要您配置各站的passkey(没有配置passkey的站点会自动跳过)。
+从各站点的控制面板,找到您的`秘钥`复制粘贴过来即可。
+配置好后如图:
+![编辑配置3.png][5]
+
+----------
+
+
+## 第四步,重点讲解Ourbits站点的配置
+IYUU自动辅种工具、Ourbits双方达成合作,可以对使用接口的用户,实现认证。
+### 申请爱语飞飞微信通知token,新用户访问:http://iyuu.cn 申请!
+1.点击`开始使用`,出现二维码,用`微信扫码`
+![微信通知1.png][6]
+![微信通知2.png][7]
+![微信通知3.png][8]
+2.复制您的token令牌到`/app/config/config.php`文件内的`iyuu.cn`对应的配置字段,保存。如图:
+![微信通知4.png][9]
+
+### 设置Ourbits:
+![编辑配置4.png][10]
+`passkey`,在你的控制面板 - 密钥
+`is_vip`,根据你的实际情况填写,因站点有下载种子的流控,如果你不在限制之列,可以`设置为1`
+`id`,为用户中心打开后,浏览器地址栏**http://xxxxx.xxx/userdetails.php?id=`46880`**等号=后面的几个数字,如图:
+![编辑配置6.png][11]
+
+到此,配置文件编辑完毕,请记得保存。
+如果提示保存格式,请保存为UTF8(无BOM)格式。
+
+------
+
+## 群晖、铁威马、威联通等Linux环境
+
+经过上面步骤,其实已经完成了配置,只需要把脚本复制到设备内,用php命令运行脚本即可。
+
+群晖php命令:`php`
+
+威联通php命令:`/mnt/ext/opt/apache/bin/php`
+
+铁威马php命令:`php`
+
+----------
+
+## Windows安装PHP运行环境
+也可以去官方下载【https://www.php.net/downloads】,官方下载的记得开启`curl、fileinfo、mbstring`,这3个扩展。
+另外我打包了一份,下载地址:
+微云链接:https://share.weiyun.com/5EiXLfn 密码:ezsvnb
+下载回来是一个ZIP压缩包,解压到`D:\IYUUAutoReseed\`目录内,文件结构如图:
+![编辑配置7.png][12]
+点击红框内`执行辅种`即可。
+如果你前期严格按照配置一步步操作,这里会正常显示跑动的辅种列表。正常如图:
+![编辑配置8.png][13]
+
+
+[1]: https://gitee.com/ledc/IYUUAutoReseed
+[2]: https://www.iyuu.cn/usr/uploads/2019/12/2720183833.png
+[3]: https://www.iyuu.cn/usr/uploads/2019/12/405587689.png
+[4]: https://www.iyuu.cn/usr/uploads/2019/12/441257656.png
+[5]: https://www.iyuu.cn/usr/uploads/2019/12/890327305.png
+[6]: https://www.iyuu.cn/usr/uploads/2019/12/2331433923.png
+[7]: https://www.iyuu.cn/usr/uploads/2019/12/3324442680.png
+[8]: https://www.iyuu.cn/usr/uploads/2019/12/3181272964.png
+[9]: https://www.iyuu.cn/usr/uploads/2019/12/3669828008.png
+[10]: https://www.iyuu.cn/usr/uploads/2019/12/3696916642.png
+[11]: https://www.iyuu.cn/usr/uploads/2019/12/1230288911.png
+[12]: https://www.iyuu.cn/usr/uploads/2019/12/3189986236.png
+[13]: https://www.iyuu.cn/usr/uploads/2019/12/2523845772.png
\ No newline at end of file
diff --git a/微信赞赏码.png b/微信赞赏码.png
new file mode 100644
index 0000000000000000000000000000000000000000..6afafcf368c90a782a666e257e96fb1fc71f76ff
GIT binary patch
literal 56180
zcmcG#1yo$k5-yqqcY-sxCAbeV1h?SsI=H)QkbwkE@Idh3HaNjO!3QU3AVGq=%N_Fn
z=fCH?b>4gT-t``94YOzO?ylE`pg(!?1WWFnF?sWeeLCVtI
z+|Aa-!`9h}`f)_CnX{*dFfE+Zzoy{m^3SkN?thC3E-(%sunPwlJLlt+{xq~O|EHac
zr<=nc;uhu{mJXJVmQEh-@U~q4v~{s@_HcH$asIy<`k%-DK>#jVC8d98{8wHa9seQW
z?jh|3pT^$;`LC_rHGN$yIn*uPoju*mEv3ESY|=f>#swtlW(oFicGGlrcKFvqss4*)
zYDvk*g%PA?)UkE4aQ1d*`nNuoQeY2DVcN&%Y+T%I+}xU+JRtbmaq+Tn3V=8{|7xn_
zY+-BV`(K+1fVjB-qbXc#EWjS%|JTMA<{&F)H%Bmh&bE$VYfBCnCu>^jeCl-JGp#9pDe#)nz59<)kI~xCHt5*tprb{;;d01d?-d_W(PYTgpiZ
z)57J$Zfk1+vJ^1m2t!utMgJsOs-e_uJ+QvcBy5ZL_D5`<~Z
zA63B8g7)v*w*SWx_-j~i8%y}0{||ccS2K5KD-Un5o29rlTw4Dts>cE6pX1SN|C$QN
z|8T-_eF`xM56BNMUVtx3TBFp4EDh
z<$mwK*S6;Tgz{a=a4Z?W(_)kBO;3YtFhUO}W)Eku&wWbSF8l(s=hjuuO5B1XAX|-Mm6T_tVxUa$5Ql34Nd6}Ci6F0s^8@>
zZL49CD>QI!}lhOH>FB|;62Z;pt}ud=QHO^K7=H40%y5BqGiW@n>juW-6NPz$jZo$Fm0~kY
z4A7$%1o(=-9AVgl8^T|j@ZV_a!u0-94W!KfPzvyWaQ1Jl9{K-IM&a7@|9jW}Mu5oU
zp9%eIhK~~em#+U$uKt(|*lWnp0<7jSvgp&IeHZX5RQ_qV>cd(Oc__QV{<
ztY;!JF(e-v+Y%D+)YaAH>jS;K_#SXEdqPJ!6=h_QD~(#tn
zYsdDHXu#zYPtUW~pO1((CAn2=J*It$d9lCrlPcveqJ0rqWCXn~hd;VZ&Mps@+aw|A
zgvW6QtE+0;fu96iX`TJCu&`PKZ&HGFA0?31LGTr7F}E_?{n
zm&={=Pp8b;;{^hbcE8t}HGX(tXR>Z>&3Q>rA26}m?tAj0yQk-B&GHYhy+A=1t*xz1
z?CtC9{E-+@8LR^G+D&^IH(98}GR{X2C2)JO5SQ>XsR2RCcTqveup{Ws4~ffpzV7$S
zzzqyy_8j=~O^C?E0{V!#;6?>ED=}SN-H-i}8hV<80q1aat8%*^7#PrPaQLXXTfhr5
zi8F9$UfJnC7NRf##1_#hF5Yo
z&HfkcNy*745%~jQZJ%$yV524>bpa)$A}pA|l2B`INnUuf6#Mu~dRmKN;60i}gh58*3UqKJC>O1`|k4
z_z;wvbqj9ci70x%xMf-NGKBrTN~H7sFZQ#^XVy12QyYyc^y}pV6x~$wb2s~ZHQXgnO0;OM+jo15#+Ohn6(pi!vEG%aGoZW!cGL6M2)C~JJI
z>8jK~(f2hq91Z`)>uyB}rDyu&uCA^LiGWNUIS`qqfOLBrI^;ST8AJ(5SJQ5pkjR!5
z(SNf!MEb`tn7AyY2Qjz>o0IwSKUP&$b&E7OE@4qzPB{%)
zZgS1
zgi{{V$;Ks-*OY3NMKEiXzOE1tL$+CHa142Nc70|)RbjxdqRQxKYs}M;J6Q)#)
zvv<771#xGs%_5fd`53I%Y4&RUIhW6oLLBeGgo`np!$-s0lo=d%o6TUyJXHw49p7*{B&
zaenfyEI6KVzt}Fj0fb?K9$UC81aJom19T!=sF2D|CH$i#g`Z@o~2A!~LD5pYoJi0uENvm7r#&
zp`P6_IG{!*<<(!t6)Xnlz35P^vAX(0c*LMn!YtoNfrdoN?MaWt__&L|UbXpGY{(tI
z{Y?3sqt!_I*#MJ82cqUuxkiz48pzY`9i>##p7Q;zXg#Wexa!A+;JY0Y-4{sKS>9A=mLw!4R|$M$QVAL~y*ykE2>W~B5Eu;B
z8p$u%lU&IDC7^h|$@S;p?x%0L$|42;hWK21ZfUOI``cEyn99rFXRq)9fozlheoM`s
z=w`}aeh=ZHphUI?roNCHDOAcJ;Qr|f;rm-4Vrwt`eNd2*gX(VA`l7eys_9bLjjqSJ
zudbX73>LVBs|4LQ7~dMjQwm2dMaajbcn)8{_rsIX)tiG3jv4?tMMmrNLZdUs6$Jqj
zByt?L$zkOWUe(oELIRfj()q`zK8R8913YuaWUqd%(|m1D2inUoOiZD#qyDuGZjf2e
zLBSC0s)<{scj92*zJcb{WKp&quoE8Pab;@N7v-ZP6Ec8RQIp&8c{Z21%!g-eqNSXE
zr(1Xi_fs}?`K({u*5yB}>$G_8X#Z+$i;h-UK8CBwQEc}gr(p#|zXo{+>xsl6l(JrK
z^+g|IYAaTRot>Qx-7eaYJSdECB$`uU1N}lxrNY6%f$zhA)QOIUBb^{JiweDsb|}%ULIaQ3_gC9bIr8YR@DJQD
zweqXaa*0tL@#-1{p0?PT-Lt=<;{vejsC@04i+9}^l-^7B%pN~eTHcWs&$@~Q025dr
z;CMpV5)9xjQgk6ueyu^#=ejwX**fRWESDjQKu*|8Lo+)wlM?jx7IsrJCIR+HG;D
zfS;Cr1hPN{n1_cC-ntY9nMdE+9Jaq}Z!C#QX
znifKaC-7}~)F8DcYVUh((YHHMpTj2r0Kj*FfLGRFPDyqKdiy2WqWE4xOgW&&J?AR8
z#zqtcPvrYOd?(e}*i@K#g~q!MD~eN-%c18^q7(Bnh8rP^7a8E5V`NdJuVJW4Wo`5k
zTQy`<=QQsZ7n6J#SXs+E&*X+*>#omczw$knm6rH+Om
z_3L`Bc_JgB&OmH${8}Y^pD-#`9r9-VmeDRy)M%ECJuooQ>Z=BJ_b~n?32XEB9r_($xO(Q=@zwXg|Za8(IEs?bRI9UD++4G}aLg31^P3+VRDO
zPTI*mK=MIzLNmx>&0Q8rKB~1U%z+@{(~gaN1b`ujX7kmRXR)G_TrQ$u9Zpoj4|?7|
zw7KYsBqK1@d|s!r=v035o3;3$VYIs@?=ZOGT0(SHB#{|@D{UMF;%9!LKG98cjBN*p
zNDfsCWNh+c7*{TCYp8(VX|cucVC76V(l)=dL7^7~I(Pb^$}to|4V|TSNI>SP$MX*y
zC;8vBPU2%d)sR;k{3TO83#s7fz)HTYYiM9U1Xsa57QOUrAt|M2a+%Mr0uPp28o!N6
zDeGu!Ypcr^Fm9H?Rfc7^F$a8j%e9XLu^O=MW|5n2T+2DFPl}BXiXQuoNhk}RKKSOJ
zS#$qk)9-#I8RXy9)uk>i{HS2!*7p{@{r&RaS8a@wqN+c=@~u&rz
zm1%du1j?@^P@#WJG%2E%6>}3yq$(Ue&=C718f;wpRr%v`yVOz2gc#lPB*w^^n-As|
zetv!-J8C2VPezLl7HJ7pyxdS~lvnaNhADD^f6mk*lll)txmP;rMK8-9+=Vb|#^gV)
zFdj53#$