From 7a41449855cdc306ae96ab4a7d6f2d734062184f Mon Sep 17 00:00:00 2001 From: Rhilip Date: Wed, 15 Jan 2020 21:41:28 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BB=8Egit=E4=B8=AD=E7=A7=BB=E9=99=A4wiki?= =?UTF-8?q?=E5=92=8Cvendor=E7=9B=AE=E5=BD=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .gitignore | 3 +- vendor/autoload.php | 7 - vendor/composer/ClassLoader.php | 445 -- vendor/composer/LICENSE | 21 - vendor/composer/autoload_classmap.php | 17 - vendor/composer/autoload_files.php | 10 - vendor/composer/autoload_namespaces.php | 10 - vendor/composer/autoload_psr4.php | 10 - vendor/composer/autoload_real.php | 70 - vendor/composer/autoload_static.php | 58 - vendor/composer/installed.json | 119 - vendor/curl/curl/.gitignore | 11 - vendor/curl/curl/.gitlab-ci.yml | 113 - vendor/curl/curl/LICENSE | 20 - vendor/curl/curl/README.md | 125 - vendor/curl/curl/composer.json | 36 - vendor/curl/curl/phpunit.xml.dist | 24 - vendor/curl/curl/src/Curl/Curl.php | 719 --- vendor/curl/curl/tests/CurlTest.php | 277 - .../data/response_headers_with_continue.txt | 13 - vendor/curl/curl/tests/data/test.png | Bin 2855 -> 0 bytes vendor/curl/curl/tests/server/Dockerfile | 9 - .../tests/server/php-curl-test/deploy.php | 37 - .../server/php-curl-test/http_basic_auth.php | 14 - .../php-curl-test/post_file_path_upload.php | 21 - .../php-curl-test/post_multidimensional.php | 4 - .../tests/server/php-curl-test/server.php | 31 - vendor/owner888/phpspider/README.md | 52 - vendor/owner888/phpspider/autoloader.php | 77 - vendor/owner888/phpspider/composer.json | 38 - vendor/owner888/phpspider/core/cache.php | 64 - vendor/owner888/phpspider/core/constants.php | 55 - vendor/owner888/phpspider/core/db.php | 579 -- vendor/owner888/phpspider/core/init.php | 101 - vendor/owner888/phpspider/core/log.php | 119 - .../phpspider/core/phpspider.bak20170807.php | 2870 --------- vendor/owner888/phpspider/core/phpspider.php | 3598 ----------- vendor/owner888/phpspider/core/queue.php | 1388 ---- vendor/owner888/phpspider/core/requests.php | 998 --- vendor/owner888/phpspider/core/selector.php | 588 -- vendor/owner888/phpspider/core/util.php | 936 --- vendor/owner888/phpspider/core/worker.php | 421 -- vendor/owner888/phpspider/gitadd.sh | 20 - .../phpspider/hacked-emails/banners.txt | 129 - .../phpspider/hacked-emails/hacked_emails.php | 49 - .../owner888/phpspider/library/cls_curl.php | 425 -- .../owner888/phpspider/library/cls_query.php | 248 - .../owner888/phpspider/library/cls_redis.php | 1263 ---- .../phpspider/library/cls_redis_client.php | 121 - .../phpspider/library/cls_redis_server.php | 179 - .../owner888/phpspider/library/phpquery.php | 5727 ----------------- .../phpspider/library/rolling_curl.php | 466 -- vendor/owner888/phpspider/test.php | 7 - vendor/owner888/phpspider/worker.php | 32 - wiki/Home.md | 21 - wiki/合作站点鉴权配置.md | 27 - wiki/命令汇总.md | 10 - wiki/常见问题.md | 83 - wiki/开发计划.md | 15 - wiki/数据同步.md | 5 - wiki/更新历史.md | 109 - ...动辅种最简配置(windows篇).md | 102 - 62 files changed, 2 insertions(+), 23144 deletions(-) delete mode 100644 vendor/autoload.php delete mode 100644 vendor/composer/ClassLoader.php delete mode 100644 vendor/composer/LICENSE delete mode 100644 vendor/composer/autoload_classmap.php delete mode 100644 vendor/composer/autoload_files.php delete mode 100644 vendor/composer/autoload_namespaces.php delete mode 100644 vendor/composer/autoload_psr4.php delete mode 100644 vendor/composer/autoload_real.php delete mode 100644 vendor/composer/autoload_static.php delete mode 100644 vendor/composer/installed.json delete mode 100644 vendor/curl/curl/.gitignore delete mode 100644 vendor/curl/curl/.gitlab-ci.yml delete mode 100644 vendor/curl/curl/LICENSE delete mode 100644 vendor/curl/curl/README.md delete mode 100644 vendor/curl/curl/composer.json delete mode 100644 vendor/curl/curl/phpunit.xml.dist delete mode 100644 vendor/curl/curl/src/Curl/Curl.php delete mode 100644 vendor/curl/curl/tests/CurlTest.php delete mode 100644 vendor/curl/curl/tests/data/response_headers_with_continue.txt delete mode 100644 vendor/curl/curl/tests/data/test.png delete mode 100644 vendor/curl/curl/tests/server/Dockerfile delete mode 100644 vendor/curl/curl/tests/server/php-curl-test/deploy.php delete mode 100644 vendor/curl/curl/tests/server/php-curl-test/http_basic_auth.php delete mode 100644 vendor/curl/curl/tests/server/php-curl-test/post_file_path_upload.php delete mode 100644 vendor/curl/curl/tests/server/php-curl-test/post_multidimensional.php delete mode 100644 vendor/curl/curl/tests/server/php-curl-test/server.php delete mode 100644 vendor/owner888/phpspider/README.md delete mode 100644 vendor/owner888/phpspider/autoloader.php delete mode 100644 vendor/owner888/phpspider/composer.json delete mode 100644 vendor/owner888/phpspider/core/cache.php delete mode 100644 vendor/owner888/phpspider/core/constants.php delete mode 100644 vendor/owner888/phpspider/core/db.php delete mode 100644 vendor/owner888/phpspider/core/init.php delete mode 100644 vendor/owner888/phpspider/core/log.php delete mode 100644 vendor/owner888/phpspider/core/phpspider.bak20170807.php delete mode 100644 vendor/owner888/phpspider/core/phpspider.php delete mode 100644 vendor/owner888/phpspider/core/queue.php delete mode 100644 vendor/owner888/phpspider/core/requests.php delete mode 100644 vendor/owner888/phpspider/core/selector.php delete mode 100644 vendor/owner888/phpspider/core/util.php delete mode 100644 vendor/owner888/phpspider/core/worker.php delete mode 100644 vendor/owner888/phpspider/gitadd.sh delete mode 100644 vendor/owner888/phpspider/hacked-emails/banners.txt delete mode 100644 vendor/owner888/phpspider/hacked-emails/hacked_emails.php delete mode 100644 vendor/owner888/phpspider/library/cls_curl.php delete mode 100644 vendor/owner888/phpspider/library/cls_query.php delete mode 100644 vendor/owner888/phpspider/library/cls_redis.php delete mode 100644 vendor/owner888/phpspider/library/cls_redis_client.php delete mode 100644 vendor/owner888/phpspider/library/cls_redis_server.php delete mode 100644 vendor/owner888/phpspider/library/phpquery.php delete mode 100644 vendor/owner888/phpspider/library/rolling_curl.php delete mode 100644 vendor/owner888/phpspider/test.php delete mode 100644 vendor/owner888/phpspider/worker.php delete mode 100644 wiki/Home.md delete mode 100644 wiki/合作站点鉴权配置.md delete mode 100644 wiki/命令汇总.md delete mode 100644 wiki/常见问题.md delete mode 100644 wiki/开发计划.md delete mode 100644 wiki/数据同步.md delete mode 100644 wiki/更新历史.md delete mode 100644 wiki/自动辅种最简配置(windows篇).md diff --git a/.gitignore b/.gitignore index 2e09816..8ddbddc 100644 --- a/.gitignore +++ b/.gitignore @@ -2,4 +2,5 @@ /app/config/config.php /php-7.2.12-nts /*.bat -/*.sh \ No newline at end of file +/*.sh +/vendor \ No newline at end of file diff --git a/vendor/autoload.php b/vendor/autoload.php deleted file mode 100644 index a21b818..0000000 --- a/vendor/autoload.php +++ /dev/null @@ -1,7 +0,0 @@ - - * Jordi Boggiano - * - * For the full copyright and license information, please view the LICENSE - * file that was distributed with this source code. - */ - -namespace Composer\Autoload; - -/** - * ClassLoader implements a PSR-0, PSR-4 and classmap class loader. - * - * $loader = new \Composer\Autoload\ClassLoader(); - * - * // register classes with namespaces - * $loader->add('Symfony\Component', __DIR__.'/component'); - * $loader->add('Symfony', __DIR__.'/framework'); - * - * // activate the autoloader - * $loader->register(); - * - * // to enable searching the include path (eg. for PEAR packages) - * $loader->setUseIncludePath(true); - * - * In this example, if you try to use a class in the Symfony\Component - * namespace or one of its children (Symfony\Component\Console for instance), - * the autoloader will first look for the class under the component/ - * directory, and it will then fallback to the framework/ directory if not - * found before giving up. - * - * This class is loosely based on the Symfony UniversalClassLoader. - * - * @author Fabien Potencier - * @author Jordi Boggiano - * @see http://www.php-fig.org/psr/psr-0/ - * @see http://www.php-fig.org/psr/psr-4/ - */ -class ClassLoader -{ - // PSR-4 - private $prefixLengthsPsr4 = array(); - private $prefixDirsPsr4 = array(); - private $fallbackDirsPsr4 = array(); - - // PSR-0 - private $prefixesPsr0 = array(); - private $fallbackDirsPsr0 = array(); - - private $useIncludePath = false; - private $classMap = array(); - private $classMapAuthoritative = false; - private $missingClasses = array(); - private $apcuPrefix; - - public function getPrefixes() - { - if (!empty($this->prefixesPsr0)) { - return call_user_func_array('array_merge', $this->prefixesPsr0); - } - - return array(); - } - - public function getPrefixesPsr4() - { - return $this->prefixDirsPsr4; - } - - public function getFallbackDirs() - { - return $this->fallbackDirsPsr0; - } - - public function getFallbackDirsPsr4() - { - return $this->fallbackDirsPsr4; - } - - public function getClassMap() - { - return $this->classMap; - } - - /** - * @param array $classMap Class to filename map - */ - public function addClassMap(array $classMap) - { - if ($this->classMap) { - $this->classMap = array_merge($this->classMap, $classMap); - } else { - $this->classMap = $classMap; - } - } - - /** - * Registers a set of PSR-0 directories for a given prefix, either - * appending or prepending to the ones previously set for this prefix. - * - * @param string $prefix The prefix - * @param array|string $paths The PSR-0 root directories - * @param bool $prepend Whether to prepend the directories - */ - public function add($prefix, $paths, $prepend = false) - { - if (!$prefix) { - if ($prepend) { - $this->fallbackDirsPsr0 = array_merge( - (array) $paths, - $this->fallbackDirsPsr0 - ); - } else { - $this->fallbackDirsPsr0 = array_merge( - $this->fallbackDirsPsr0, - (array) $paths - ); - } - - return; - } - - $first = $prefix[0]; - if (!isset($this->prefixesPsr0[$first][$prefix])) { - $this->prefixesPsr0[$first][$prefix] = (array) $paths; - - return; - } - if ($prepend) { - $this->prefixesPsr0[$first][$prefix] = array_merge( - (array) $paths, - $this->prefixesPsr0[$first][$prefix] - ); - } else { - $this->prefixesPsr0[$first][$prefix] = array_merge( - $this->prefixesPsr0[$first][$prefix], - (array) $paths - ); - } - } - - /** - * Registers a set of PSR-4 directories for a given namespace, either - * appending or prepending to the ones previously set for this namespace. - * - * @param string $prefix The prefix/namespace, with trailing '\\' - * @param array|string $paths The PSR-4 base directories - * @param bool $prepend Whether to prepend the directories - * - * @throws \InvalidArgumentException - */ - public function addPsr4($prefix, $paths, $prepend = false) - { - if (!$prefix) { - // Register directories for the root namespace. - if ($prepend) { - $this->fallbackDirsPsr4 = array_merge( - (array) $paths, - $this->fallbackDirsPsr4 - ); - } else { - $this->fallbackDirsPsr4 = array_merge( - $this->fallbackDirsPsr4, - (array) $paths - ); - } - } elseif (!isset($this->prefixDirsPsr4[$prefix])) { - // Register directories for a new namespace. - $length = strlen($prefix); - if ('\\' !== $prefix[$length - 1]) { - throw new \InvalidArgumentException("A non-empty PSR-4 prefix must end with a namespace separator."); - } - $this->prefixLengthsPsr4[$prefix[0]][$prefix] = $length; - $this->prefixDirsPsr4[$prefix] = (array) $paths; - } elseif ($prepend) { - // Prepend directories for an already registered namespace. - $this->prefixDirsPsr4[$prefix] = array_merge( - (array) $paths, - $this->prefixDirsPsr4[$prefix] - ); - } else { - // Append directories for an already registered namespace. - $this->prefixDirsPsr4[$prefix] = array_merge( - $this->prefixDirsPsr4[$prefix], - (array) $paths - ); - } - } - - /** - * Registers a set of PSR-0 directories for a given prefix, - * replacing any others previously set for this prefix. - * - * @param string $prefix The prefix - * @param array|string $paths The PSR-0 base directories - */ - public function set($prefix, $paths) - { - if (!$prefix) { - $this->fallbackDirsPsr0 = (array) $paths; - } else { - $this->prefixesPsr0[$prefix[0]][$prefix] = (array) $paths; - } - } - - /** - * Registers a set of PSR-4 directories for a given namespace, - * replacing any others previously set for this namespace. - * - * @param string $prefix The prefix/namespace, with trailing '\\' - * @param array|string $paths The PSR-4 base directories - * - * @throws \InvalidArgumentException - */ - public function setPsr4($prefix, $paths) - { - if (!$prefix) { - $this->fallbackDirsPsr4 = (array) $paths; - } else { - $length = strlen($prefix); - if ('\\' !== $prefix[$length - 1]) { - throw new \InvalidArgumentException("A non-empty PSR-4 prefix must end with a namespace separator."); - } - $this->prefixLengthsPsr4[$prefix[0]][$prefix] = $length; - $this->prefixDirsPsr4[$prefix] = (array) $paths; - } - } - - /** - * Turns on searching the include path for class files. - * - * @param bool $useIncludePath - */ - public function setUseIncludePath($useIncludePath) - { - $this->useIncludePath = $useIncludePath; - } - - /** - * Can be used to check if the autoloader uses the include path to check - * for classes. - * - * @return bool - */ - public function getUseIncludePath() - { - return $this->useIncludePath; - } - - /** - * Turns off searching the prefix and fallback directories for classes - * that have not been registered with the class map. - * - * @param bool $classMapAuthoritative - */ - public function setClassMapAuthoritative($classMapAuthoritative) - { - $this->classMapAuthoritative = $classMapAuthoritative; - } - - /** - * Should class lookup fail if not found in the current class map? - * - * @return bool - */ - public function isClassMapAuthoritative() - { - return $this->classMapAuthoritative; - } - - /** - * APCu prefix to use to cache found/not-found classes, if the extension is enabled. - * - * @param string|null $apcuPrefix - */ - public function setApcuPrefix($apcuPrefix) - { - $this->apcuPrefix = function_exists('apcu_fetch') && filter_var(ini_get('apc.enabled'), FILTER_VALIDATE_BOOLEAN) ? $apcuPrefix : null; - } - - /** - * The APCu prefix in use, or null if APCu caching is not enabled. - * - * @return string|null - */ - public function getApcuPrefix() - { - return $this->apcuPrefix; - } - - /** - * Registers this instance as an autoloader. - * - * @param bool $prepend Whether to prepend the autoloader or not - */ - public function register($prepend = false) - { - spl_autoload_register(array($this, 'loadClass'), true, $prepend); - } - - /** - * Unregisters this instance as an autoloader. - */ - public function unregister() - { - spl_autoload_unregister(array($this, 'loadClass')); - } - - /** - * Loads the given class or interface. - * - * @param string $class The name of the class - * @return bool|null True if loaded, null otherwise - */ - public function loadClass($class) - { - if ($file = $this->findFile($class)) { - includeFile($file); - - return true; - } - } - - /** - * Finds the path to the file where the class is defined. - * - * @param string $class The name of the class - * - * @return string|false The path if found, false otherwise - */ - public function findFile($class) - { - // class map lookup - if (isset($this->classMap[$class])) { - return $this->classMap[$class]; - } - if ($this->classMapAuthoritative || isset($this->missingClasses[$class])) { - return false; - } - if (null !== $this->apcuPrefix) { - $file = apcu_fetch($this->apcuPrefix.$class, $hit); - if ($hit) { - return $file; - } - } - - $file = $this->findFileWithExtension($class, '.php'); - - // Search for Hack files if we are running on HHVM - if (false === $file && defined('HHVM_VERSION')) { - $file = $this->findFileWithExtension($class, '.hh'); - } - - if (null !== $this->apcuPrefix) { - apcu_add($this->apcuPrefix.$class, $file); - } - - if (false === $file) { - // Remember that this class does not exist. - $this->missingClasses[$class] = true; - } - - return $file; - } - - private function findFileWithExtension($class, $ext) - { - // PSR-4 lookup - $logicalPathPsr4 = strtr($class, '\\', DIRECTORY_SEPARATOR) . $ext; - - $first = $class[0]; - if (isset($this->prefixLengthsPsr4[$first])) { - $subPath = $class; - while (false !== $lastPos = strrpos($subPath, '\\')) { - $subPath = substr($subPath, 0, $lastPos); - $search = $subPath . '\\'; - if (isset($this->prefixDirsPsr4[$search])) { - $pathEnd = DIRECTORY_SEPARATOR . substr($logicalPathPsr4, $lastPos + 1); - foreach ($this->prefixDirsPsr4[$search] as $dir) { - if (file_exists($file = $dir . $pathEnd)) { - return $file; - } - } - } - } - } - - // PSR-4 fallback dirs - foreach ($this->fallbackDirsPsr4 as $dir) { - if (file_exists($file = $dir . DIRECTORY_SEPARATOR . $logicalPathPsr4)) { - return $file; - } - } - - // PSR-0 lookup - if (false !== $pos = strrpos($class, '\\')) { - // namespaced class name - $logicalPathPsr0 = substr($logicalPathPsr4, 0, $pos + 1) - . strtr(substr($logicalPathPsr4, $pos + 1), '_', DIRECTORY_SEPARATOR); - } else { - // PEAR-like class name - $logicalPathPsr0 = strtr($class, '_', DIRECTORY_SEPARATOR) . $ext; - } - - if (isset($this->prefixesPsr0[$first])) { - foreach ($this->prefixesPsr0[$first] as $prefix => $dirs) { - if (0 === strpos($class, $prefix)) { - foreach ($dirs as $dir) { - if (file_exists($file = $dir . DIRECTORY_SEPARATOR . $logicalPathPsr0)) { - return $file; - } - } - } - } - } - - // PSR-0 fallback dirs - foreach ($this->fallbackDirsPsr0 as $dir) { - if (file_exists($file = $dir . DIRECTORY_SEPARATOR . $logicalPathPsr0)) { - return $file; - } - } - - // PSR-0 include paths. - if ($this->useIncludePath && $file = stream_resolve_include_path($logicalPathPsr0)) { - return $file; - } - - return false; - } -} - -/** - * Scope isolated include. - * - * Prevents access to $this/self from included files. - */ -function includeFile($file) -{ - include $file; -} diff --git a/vendor/composer/LICENSE b/vendor/composer/LICENSE deleted file mode 100644 index f27399a..0000000 --- a/vendor/composer/LICENSE +++ /dev/null @@ -1,21 +0,0 @@ - -Copyright (c) Nils Adermann, Jordi Boggiano - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is furnished -to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. - diff --git a/vendor/composer/autoload_classmap.php b/vendor/composer/autoload_classmap.php deleted file mode 100644 index be254e2..0000000 --- a/vendor/composer/autoload_classmap.php +++ /dev/null @@ -1,17 +0,0 @@ - $baseDir . '/app/Class/Bencode.php', - 'IFile' => $baseDir . '/app/Class/IFile.php', - 'Oauth' => $baseDir . '/app/Class/Oauth.php', - 'Rpc' => $baseDir . '/app/Class/Rpc.php', - 'TransmissionRPC' => $baseDir . '/app/Class/TransmissionRPC.php', - 'TransmissionRPCException' => $baseDir . '/app/Class/TransmissionRPC.php', - 'qBittorrent' => $baseDir . '/app/Class/qBittorrent.php', - 'uTorrent' => $baseDir . '/app/Class/uTorrent.php', -); diff --git a/vendor/composer/autoload_files.php b/vendor/composer/autoload_files.php deleted file mode 100644 index ca97d90..0000000 --- a/vendor/composer/autoload_files.php +++ /dev/null @@ -1,10 +0,0 @@ - $baseDir . '/app/Class/Function.php', -); diff --git a/vendor/composer/autoload_namespaces.php b/vendor/composer/autoload_namespaces.php deleted file mode 100644 index c018f68..0000000 --- a/vendor/composer/autoload_namespaces.php +++ /dev/null @@ -1,10 +0,0 @@ - array($vendorDir . '/curl/curl/src'), -); diff --git a/vendor/composer/autoload_psr4.php b/vendor/composer/autoload_psr4.php deleted file mode 100644 index 909fcbe..0000000 --- a/vendor/composer/autoload_psr4.php +++ /dev/null @@ -1,10 +0,0 @@ - array($vendorDir . '/owner888/phpspider'), -); diff --git a/vendor/composer/autoload_real.php b/vendor/composer/autoload_real.php deleted file mode 100644 index bc69ac4..0000000 --- a/vendor/composer/autoload_real.php +++ /dev/null @@ -1,70 +0,0 @@ -= 50600 && !defined('HHVM_VERSION') && (!function_exists('zend_loader_file_encoded') || !zend_loader_file_encoded()); - if ($useStaticLoader) { - require_once __DIR__ . '/autoload_static.php'; - - call_user_func(\Composer\Autoload\ComposerStaticInit902220bdd481fe56c25750cdf0255dd6::getInitializer($loader)); - } else { - $map = require __DIR__ . '/autoload_namespaces.php'; - foreach ($map as $namespace => $path) { - $loader->set($namespace, $path); - } - - $map = require __DIR__ . '/autoload_psr4.php'; - foreach ($map as $namespace => $path) { - $loader->setPsr4($namespace, $path); - } - - $classMap = require __DIR__ . '/autoload_classmap.php'; - if ($classMap) { - $loader->addClassMap($classMap); - } - } - - $loader->register(true); - - if ($useStaticLoader) { - $includeFiles = Composer\Autoload\ComposerStaticInit902220bdd481fe56c25750cdf0255dd6::$files; - } else { - $includeFiles = require __DIR__ . '/autoload_files.php'; - } - foreach ($includeFiles as $fileIdentifier => $file) { - composerRequire902220bdd481fe56c25750cdf0255dd6($fileIdentifier, $file); - } - - return $loader; - } -} - -function composerRequire902220bdd481fe56c25750cdf0255dd6($fileIdentifier, $file) -{ - if (empty($GLOBALS['__composer_autoload_files'][$fileIdentifier])) { - require $file; - - $GLOBALS['__composer_autoload_files'][$fileIdentifier] = true; - } -} diff --git a/vendor/composer/autoload_static.php b/vendor/composer/autoload_static.php deleted file mode 100644 index a0319f0..0000000 --- a/vendor/composer/autoload_static.php +++ /dev/null @@ -1,58 +0,0 @@ - __DIR__ . '/../..' . '/app/Class/Function.php', - ); - - public static $prefixLengthsPsr4 = array ( - 'p' => - array ( - 'phpspider\\' => 10, - ), - ); - - public static $prefixDirsPsr4 = array ( - 'phpspider\\' => - array ( - 0 => __DIR__ . '/..' . '/owner888/phpspider', - ), - ); - - public static $prefixesPsr0 = array ( - 'C' => - array ( - 'Curl' => - array ( - 0 => __DIR__ . '/..' . '/curl/curl/src', - ), - ), - ); - - public static $classMap = array ( - 'Bencode' => __DIR__ . '/../..' . '/app/Class/Bencode.php', - 'IFile' => __DIR__ . '/../..' . '/app/Class/IFile.php', - 'Oauth' => __DIR__ . '/../..' . '/app/Class/Oauth.php', - 'Rpc' => __DIR__ . '/../..' . '/app/Class/Rpc.php', - 'TransmissionRPC' => __DIR__ . '/../..' . '/app/Class/TransmissionRPC.php', - 'TransmissionRPCException' => __DIR__ . '/../..' . '/app/Class/TransmissionRPC.php', - 'qBittorrent' => __DIR__ . '/../..' . '/app/Class/qBittorrent.php', - 'uTorrent' => __DIR__ . '/../..' . '/app/Class/uTorrent.php', - ); - - public static function getInitializer(ClassLoader $loader) - { - return \Closure::bind(function () use ($loader) { - $loader->prefixLengthsPsr4 = ComposerStaticInit902220bdd481fe56c25750cdf0255dd6::$prefixLengthsPsr4; - $loader->prefixDirsPsr4 = ComposerStaticInit902220bdd481fe56c25750cdf0255dd6::$prefixDirsPsr4; - $loader->prefixesPsr0 = ComposerStaticInit902220bdd481fe56c25750cdf0255dd6::$prefixesPsr0; - $loader->classMap = ComposerStaticInit902220bdd481fe56c25750cdf0255dd6::$classMap; - - }, null, ClassLoader::class); - } -} diff --git a/vendor/composer/installed.json b/vendor/composer/installed.json deleted file mode 100644 index 02e3e39..0000000 --- a/vendor/composer/installed.json +++ /dev/null @@ -1,119 +0,0 @@ -[ - { - "name": "curl/curl", - "version": "2.2.0", - "version_normalized": "2.2.0.0", - "source": { - "type": "git", - "url": "https://github.com/php-mod/curl.git", - "reference": "d22086dd2eee5ca02e4c29b9a5bdf3645bfdbbff" - }, - "dist": { - "type": "zip", - "url": "https://api.github.com/repos/php-mod/curl/zipball/d22086dd2eee5ca02e4c29b9a5bdf3645bfdbbff", - "reference": "d22086dd2eee5ca02e4c29b9a5bdf3645bfdbbff", - "shasum": "", - "mirrors": [ - { - "url": "https://mirrors.aliyun.com/composer/dists/%package%/%reference%.%type%", - "preferred": true - } - ] - }, - "require": { - "ext-curl": "*", - "php": "^5.6 | ^7.0" - }, - "require-dev": { - "phpunit/phpunit": "^5.7", - "squizlabs/php_codesniffer": "~2.1" - }, - "time": "2018-12-04T19:47:03+00:00", - "type": "library", - "installation-source": "dist", - "autoload": { - "psr-0": { - "Curl": "src/" - } - }, - "notification-url": "https://packagist.org/downloads/", - "license": [ - "MIT" - ], - "authors": [ - { - "name": "Hassan Amouhzi", - "email": "hassan@anezi.net", - "homepage": "http://hassan.amouhzi.com" - }, - { - "name": "php-curl-class", - "homepage": "https://github.com/php-curl-class" - }, - { - "name": "user52", - "homepage": "https://github.com/user52" - } - ], - "description": "cURL class for PHP", - "homepage": "https://github.com/php-mod/curl", - "keywords": [ - "curl", - "dot" - ] - }, - { - "name": "owner888/phpspider", - "version": "v2.1.6", - "version_normalized": "2.1.6.0", - "source": { - "type": "git", - "url": "https://github.com/owner888/phpspider.git", - "reference": "e6021148adec201418c16ba26f39bc013ba5b4d9" - }, - "dist": { - "type": "zip", - "url": "https://api.github.com/repos/owner888/phpspider/zipball/e6021148adec201418c16ba26f39bc013ba5b4d9", - "reference": "e6021148adec201418c16ba26f39bc013ba5b4d9", - "shasum": "", - "mirrors": [ - { - "url": "https://mirrors.aliyun.com/composer/dists/%package%/%reference%.%type%", - "preferred": true - } - ] - }, - "require": { - "php": ">=5.5.0" - }, - "suggest": { - "ext-pcntl、ext-redis": "For better performance. " - }, - "time": "2018-08-15T08:04:29+00:00", - "type": "library", - "installation-source": "dist", - "autoload": { - "psr-4": { - "phpspider\\": "./" - } - }, - "notification-url": "https://packagist.org/downloads/", - "license": [ - "MIT" - ], - "authors": [ - { - "name": "Seatle Yang", - "email": "seatle@foxmail.com", - "homepage": "http://www.phpspider.org", - "role": "Developer" - } - ], - "description": "The PHPSpider Framework.", - "homepage": "http://www.phpspider.org", - "keywords": [ - "framework", - "phpspider" - ] - } -] diff --git a/vendor/curl/curl/.gitignore b/vendor/curl/curl/.gitignore deleted file mode 100644 index 0d8c4db..0000000 --- a/vendor/curl/curl/.gitignore +++ /dev/null @@ -1,11 +0,0 @@ -vendor/* -*.orig -.buildpath -.project -.settings/* -.idea/* -composer.lock -*~ -tests/phpunit_report/* -/.settings/ -/.php_cs.cache diff --git a/vendor/curl/curl/.gitlab-ci.yml b/vendor/curl/curl/.gitlab-ci.yml deleted file mode 100644 index ff2854f..0000000 --- a/vendor/curl/curl/.gitlab-ci.yml +++ /dev/null @@ -1,113 +0,0 @@ -stages: - - build - - test - -build-test-server: - image: docker:latest - stage: build - services: - - docker:dind - script: - - docker login -u "$CI_REGISTRY_USER" -p "$CI_REGISTRY_PASSWORD" $CI_REGISTRY - - docker build --pull -t "$CI_REGISTRY_IMAGE:server-test" tests/server - - docker push "$CI_REGISTRY_IMAGE:server-test" - only: - changes: - - tests/server - -tests-php5.6: - image: alpine:3.7 - stage: test - services: - - name: "$CI_REGISTRY_IMAGE:server-test" - alias: server_test - script: - - apk add --no-cache php5-cli php5-curl php5-gd php5-phar php5-json php5-openssl php5-dom php5-xml php5-zlib - - ln -s /usr/bin/php5 /usr/bin/php - - php --version - - if [ ! -f composer.phar ]; then DOWLOAD_COMPOSER=1 ; fi; - - if [ -n "$DOWLOAD_COMPOSER" ] ; then php -r "copy('https://getcomposer.org/installer', 'composer-setup.php');" ; fi; - - if [ -n "$DOWLOAD_COMPOSER" ] ; then php -r "if (hash_file('sha384', 'composer-setup.php') === '93b54496392c062774670ac18b134c3b3a95e5a5e5c8f1a9f115f203b75bf9a129d5daa8ba6a13e2cc8a1da0806388a8') { echo 'Installer verified'; } else { echo 'Installer corrupt'; unlink('composer-setup.php'); } echo PHP_EOL;" ; fi; - - if [ -n "$DOWLOAD_COMPOSER" ] ; then php composer-setup.php ; fi; - - if [ -n "$DOWLOAD_COMPOSER" ] ; then php -r "unlink('composer-setup.php');" ; fi; - - php composer.phar install - - vendor/bin/phpcs --warning-severity=0 --standard=PSR2 src - - vendor/bin/phpunit - cache: - key: php5.6 - paths: - - composer.phar - - vendor - -tests-php7.0: - image: alpine:3.5 - stage: test - services: - - name: "$CI_REGISTRY_IMAGE:server-test" - alias: server_test - script: - - apk add --no-cache php7 php7-curl php7-gd php7-phar php7-json php7-openssl php7-dom php7-mbstring - - ln -s /usr/bin/php7 /usr/bin/php - - php --version - - if [ ! -f composer.phar ]; then DOWLOAD_COMPOSER=1 ; fi; - - if [ -n "$DOWLOAD_COMPOSER" ] ; then php -r "copy('https://getcomposer.org/installer', 'composer-setup.php');" ; fi; - - if [ -n "$DOWLOAD_COMPOSER" ] ; then php -r "if (hash_file('sha384', 'composer-setup.php') === '93b54496392c062774670ac18b134c3b3a95e5a5e5c8f1a9f115f203b75bf9a129d5daa8ba6a13e2cc8a1da0806388a8') { echo 'Installer verified'; } else { echo 'Installer corrupt'; unlink('composer-setup.php'); } echo PHP_EOL;" ; fi; - - if [ -n "$DOWLOAD_COMPOSER" ] ; then php composer-setup.php ; fi; - - if [ -n "$DOWLOAD_COMPOSER" ] ; then php -r "unlink('composer-setup.php');" ; fi; - - php composer.phar install - - vendor/bin/phpcs --warning-severity=0 --standard=PSR2 src - - nohup php -S localhost:8000 -t tests/server/php-curl-test > phpd.log 2>&1 & - - vendor/bin/phpunit - cache: - key: php7.0 - paths: - - composer.phar - - vendor - -tests-php7.1: - image: alpine:3.7 - stage: test - services: - - name: "$CI_REGISTRY_IMAGE:server-test" - alias: server_test - script: - - apk add --no-cache php7-cli php7-curl php7-gd php7-phar php7-json php7-openssl php7-dom php7-simplexml php7-tokenizer php7-mbstring php7-xml - - php --version - - if [ ! -f composer.phar ]; then DOWLOAD_COMPOSER=1 ; fi; - - if [ -n "$DOWLOAD_COMPOSER" ] ; then php -r "copy('https://getcomposer.org/installer', 'composer-setup.php');" ; fi; - - if [ -n "$DOWLOAD_COMPOSER" ] ; then php -r "if (hash_file('sha384', 'composer-setup.php') === '93b54496392c062774670ac18b134c3b3a95e5a5e5c8f1a9f115f203b75bf9a129d5daa8ba6a13e2cc8a1da0806388a8') { echo 'Installer verified'; } else { echo 'Installer corrupt'; unlink('composer-setup.php'); } echo PHP_EOL;" ; fi; - - if [ -n "$DOWLOAD_COMPOSER" ] ; then php composer-setup.php ; fi; - - if [ -n "$DOWLOAD_COMPOSER" ] ; then php -r "unlink('composer-setup.php');" ; fi; - - php composer.phar install - - vendor/bin/phpcs --warning-severity=0 --standard=PSR2 src - - nohup php -S localhost:8000 -t tests/server/php-curl-test > phpd.log 2>&1 & - - vendor/bin/phpunit - cache: - key: php7.1 - paths: - - composer.phar - - vendor - -tests-php7.2: - image: alpine:3.8 - stage: test - services: - - name: "$CI_REGISTRY_IMAGE:server-test" - alias: server_test - script: - - apk add --no-cache php7-cli php7-curl php7-gd php7-phar php7-json php7-openssl php7-dom php7-simplexml php7-tokenizer php7-mbstring php7-xml - - php --version - - if [ ! -f composer.phar ]; then DOWLOAD_COMPOSER=1 ; fi; - - if [ -n "$DOWLOAD_COMPOSER" ] ; then php -r "copy('https://getcomposer.org/installer', 'composer-setup.php');" ; fi; - - if [ -n "$DOWLOAD_COMPOSER" ] ; then php -r "if (hash_file('sha384', 'composer-setup.php') === '93b54496392c062774670ac18b134c3b3a95e5a5e5c8f1a9f115f203b75bf9a129d5daa8ba6a13e2cc8a1da0806388a8') { echo 'Installer verified'; } else { echo 'Installer corrupt'; unlink('composer-setup.php'); } echo PHP_EOL;" ; fi; - - if [ -n "$DOWLOAD_COMPOSER" ] ; then php composer-setup.php ; fi; - - if [ -n "$DOWLOAD_COMPOSER" ] ; then php -r "unlink('composer-setup.php');" ; fi; - - php composer.phar install - - vendor/bin/phpcs --warning-severity=0 --standard=PSR2 src - - nohup php -S localhost:8000 -t tests/server/php-curl-test > phpd.log 2>&1 & - - vendor/bin/phpunit - cache: - key: php7.2 - paths: - - composer.phar - - vendor diff --git a/vendor/curl/curl/LICENSE b/vendor/curl/curl/LICENSE deleted file mode 100644 index 670155c..0000000 --- a/vendor/curl/curl/LICENSE +++ /dev/null @@ -1,20 +0,0 @@ -The MIT License (MIT) - -Copyright (c) 2013 php-mod - -Permission is hereby granted, free of charge, to any person obtaining a copy of -this software and associated documentation files (the "Software"), to deal in -the Software without restriction, including without limitation the rights to -use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of -the Software, and to permit persons to whom the Software is furnished to do so, -subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS -FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR -COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER -IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/vendor/curl/curl/README.md b/vendor/curl/curl/README.md deleted file mode 100644 index 779fc33..0000000 --- a/vendor/curl/curl/README.md +++ /dev/null @@ -1,125 +0,0 @@ -# PHP Curl Class - -This library provides an object-oriented wrapper of the PHP cURL extension. - -If you have questions or problems with installation or usage [create an Issue](https://github.com/php-mod/curl/issues). - -## Installation - -In order to install this library via composer run the following command in the console: - -```sh -composer require curl/curl -``` - -or add the package manually to your composer.json file in the require section: - -```json -"curl/curl": "^2.0" -``` - -## Usage examples - -```php -$curl = new Curl\Curl(); -$curl->get('http://www.example.com/'); -``` - -```php -$curl = new Curl\Curl(); -$curl->get('http://www.example.com/search', array( - 'q' => 'keyword', -)); -``` - -```php -$curl = new Curl\Curl(); -$curl->post('http://www.example.com/login/', array( - 'username' => 'myusername', - 'password' => 'mypassword', -)); -``` - -```php -$curl = new Curl\Curl(); -$curl->setBasicAuthentication('username', 'password'); -$curl->setUserAgent(''); -$curl->setReferrer(''); -$curl->setHeader('X-Requested-With', 'XMLHttpRequest'); -$curl->setCookie('key', 'value'); -$curl->get('http://www.example.com/'); - -if ($curl->error) { - echo $curl->error_code; -} -else { - echo $curl->response; -} - -var_dump($curl->request_headers); -var_dump($curl->response_headers); -``` - -```php -$curl = new Curl\Curl(); -$curl->setOpt(CURLOPT_RETURNTRANSFER, TRUE); -$curl->setOpt(CURLOPT_SSL_VERIFYPEER, FALSE); -$curl->get('https://encrypted.example.com/'); -``` - -```php -$curl = new Curl\Curl(); -$curl->put('http://api.example.com/user/', array( - 'first_name' => 'Zach', - 'last_name' => 'Borboa', -)); -``` - -```php -$curl = new Curl\Curl(); -$curl->patch('http://api.example.com/profile/', array( - 'image' => '@path/to/file.jpg', -)); -``` - -```php -$curl = new Curl\Curl(); -$curl->delete('http://api.example.com/user/', array( - 'id' => '1234', -)); -``` - -```php -$curl->close(); -``` - -```php -// Example access to curl object. -curl_set_opt($curl->curl, CURLOPT_USERAGENT, 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1'); -curl_close($curl->curl); -``` - -```php -// Example of downloading a file or any other content -$curl = new Curl\Curl(); -// open the file where the request response should be written -$file_handle = fopen($target_file, 'w+'); -// pass it to the curl resource -$curl->setOpt(CURLOPT_FILE, $file_handle); -// do any type of request -$curl->get('https://github.com'); -// disable writing to file -$curl->setOpt(CURLOPT_FILE, null); -// close the file for writing -fclose($file_handle); -``` - - -## Testing - -In order to test the library: - -1. Create a fork -2. Clone the fork to your machine -3. Install the depencies `composer install` -4. Run the unit tests `./vendor/bin/phpunit tests` diff --git a/vendor/curl/curl/composer.json b/vendor/curl/curl/composer.json deleted file mode 100644 index b12dde9..0000000 --- a/vendor/curl/curl/composer.json +++ /dev/null @@ -1,36 +0,0 @@ -{ - "name": "curl/curl", - "description": "cURL class for PHP", - "keywords": ["dot", "curl"], - "homepage": "https://github.com/php-mod/curl", - "type": "library", - "license": "MIT", - "authors": [ - { - "name": "php-curl-class", - "homepage": "https://github.com/php-curl-class" - }, - { - "name": "Hassan Amouhzi", - "email": "hassan@anezi.net", - "homepage": "http://hassan.amouhzi.com" - }, - { - "name": "user52", - "homepage": "https://github.com/user52" - } - ], - "require": { - "php": "^5.6 | ^7.0", - "ext-curl": "*" - }, - "require-dev": { - "phpunit/phpunit": "^5.7", - "squizlabs/php_codesniffer": "~2.1" - }, - "autoload": { - "psr-0": { - "Curl": "src/" - } - } -} diff --git a/vendor/curl/curl/phpunit.xml.dist b/vendor/curl/curl/phpunit.xml.dist deleted file mode 100644 index 74f7d96..0000000 --- a/vendor/curl/curl/phpunit.xml.dist +++ /dev/null @@ -1,24 +0,0 @@ - - - - - - - - - tests - - - - diff --git a/vendor/curl/curl/src/Curl/Curl.php b/vendor/curl/curl/src/Curl/Curl.php deleted file mode 100644 index 60567a6..0000000 --- a/vendor/curl/curl/src/Curl/Curl.php +++ /dev/null @@ -1,719 +0,0 @@ -get('http://www.example.com/search', array( - * 'q' => 'keyword', - * )); - * ``` - * - * Example post request with post data: - * - * ```php - * $curl = new Curl\Curl(); - * $curl->post('http://www.example.com/login/', array( - * 'username' => 'myusername', - * 'password' => 'mypassword', - * )); - * ``` - * - * @see https://php.net/manual/curl.setup.php - */ -class Curl -{ - // The HTTP authentication method(s) to use. - - /** - * @var string Type AUTH_BASIC - */ - const AUTH_BASIC = CURLAUTH_BASIC; - - /** - * @var string Type AUTH_DIGEST - */ - const AUTH_DIGEST = CURLAUTH_DIGEST; - - /** - * @var string Type AUTH_GSSNEGOTIATE - */ - const AUTH_GSSNEGOTIATE = CURLAUTH_GSSNEGOTIATE; - - /** - * @var string Type AUTH_NTLM - */ - const AUTH_NTLM = CURLAUTH_NTLM; - - /** - * @var string Type AUTH_ANY - */ - const AUTH_ANY = CURLAUTH_ANY; - - /** - * @var string Type AUTH_ANYSAFE - */ - const AUTH_ANYSAFE = CURLAUTH_ANYSAFE; - - /** - * @var string The user agent name which is set when making a request - */ - const USER_AGENT = 'PHP Curl/1.9 (+https://github.com/php-mod/curl)'; - - private $_cookies = array(); - - private $_headers = array(); - - /** - * @var resource Contains the curl resource created by `curl_init()` function - */ - public $curl; - - /** - * @var bool Whether an error occured or not - */ - public $error = false; - - /** - * @var int Contains the error code of the curren request, 0 means no error happend - */ - public $error_code = 0; - - /** - * @var string If the curl request failed, the error message is contained - */ - public $error_message = null; - - /** - * @var bool Whether an error occured or not - */ - public $curl_error = false; - - /** - * @var int Contains the error code of the curren request, 0 means no error happend. - * @see https://curl.haxx.se/libcurl/c/libcurl-errors.html - */ - public $curl_error_code = 0; - - /** - * @var string If the curl request failed, the error message is contained - */ - public $curl_error_message = null; - - /** - * @var bool Whether an error occured or not - */ - public $http_error = false; - - /** - * @var int Contains the status code of the current processed request. - */ - public $http_status_code = 0; - - /** - * @var string If the curl request failed, the error message is contained - */ - public $http_error_message = null; - - /** - * @var string|array TBD (ensure type) Contains the request header informations - */ - public $request_headers = null; - - /** - * @var string|array TBD (ensure type) Contains the response header informations - */ - public $response_headers = array(); - - /** - * @var string Contains the response from the curl request - */ - public $response = null; - - /** - * @var bool Whether the current section of response headers is after 'HTTP/1.1 100 Continue' - */ - protected $response_header_continue = false; - - /** - * Constructor ensures the available curl extension is loaded. - * - * @throws \ErrorException - */ - public function __construct() - { - if (!extension_loaded('curl')) { - throw new \ErrorException('The cURL extensions is not loaded, make sure you have installed the cURL extension: https://php.net/manual/curl.setup.php'); - } - - $this->init(); - } - - // private methods - - /** - * Initializer for the curl resource. - * - * Is called by the __construct() of the class or when the curl request is reseted. - * @return self - */ - private function init() - { - $this->curl = curl_init(); - $this->setUserAgent(self::USER_AGENT); - $this->setOpt(CURLINFO_HEADER_OUT, true); - $this->setOpt(CURLOPT_HEADER, false); - $this->setOpt(CURLOPT_RETURNTRANSFER, true); - $this->setOpt(CURLOPT_HEADERFUNCTION, array($this, 'addResponseHeaderLine')); - return $this; - } - - /** - * Handle writing the response headers - * - * @param resource $curl The current curl resource - * @param string $header_line A line from the list of response headers - * - * @return int Returns the length of the $header_line - */ - public function addResponseHeaderLine($curl, $header_line) - { - $trimmed_header = trim($header_line, "\r\n"); - - if ($trimmed_header === "") { - $this->response_header_continue = false; - } elseif (strtolower($trimmed_header) === 'http/1.1 100 continue') { - $this->response_header_continue = true; - } elseif (!$this->response_header_continue) { - $this->response_headers[] = $trimmed_header; - } - - return strlen($header_line); - } - - // protected methods - - /** - * Execute the curl request based on the respectiv settings. - * - * @return int Returns the error code for the current curl request - */ - protected function exec() - { - $this->response_headers = array(); - $this->response = curl_exec($this->curl); - $this->curl_error_code = curl_errno($this->curl); - $this->curl_error_message = curl_error($this->curl); - $this->curl_error = !($this->curl_error_code === 0); - $this->http_status_code = curl_getinfo($this->curl, CURLINFO_HTTP_CODE); - $this->http_error = in_array(floor($this->http_status_code / 100), array(4, 5)); - $this->error = $this->curl_error || $this->http_error; - $this->error_code = $this->error ? ($this->curl_error ? $this->curl_error_code : $this->http_status_code) : 0; - $this->request_headers = preg_split('/\r\n/', curl_getinfo($this->curl, CURLINFO_HEADER_OUT), null, PREG_SPLIT_NO_EMPTY); - $this->http_error_message = $this->error ? (isset($this->response_headers['0']) ? $this->response_headers['0'] : '') : ''; - $this->error_message = $this->curl_error ? $this->curl_error_message : $this->http_error_message; - - return $this->error_code; - } - - /** - * @param array|object|string $data - */ - protected function preparePayload($data) - { - $this->setOpt(CURLOPT_POST, true); - - if (is_array($data) || is_object($data)) { - $skip = false; - foreach ($data as $key => $value) { - // If a value is an instance of CurlFile skip the http_build_query - // see issue https://github.com/php-mod/curl/issues/46 - // suggestion from: https://stackoverflow.com/a/36603038/4611030 - if ($value instanceof \CurlFile) { - $skip = true; - } - } - - if (!$skip) { - $data = http_build_query($data); - } - } - - $this->setOpt(CURLOPT_POSTFIELDS, $data); - } - - /** - * Set auth options for the current request. - * - * Available auth types are: - * - * + self::AUTH_BASIC - * + self::AUTH_DIGEST - * + self::AUTH_GSSNEGOTIATE - * + self::AUTH_NTLM - * + self::AUTH_ANY - * + self::AUTH_ANYSAFE - * - * @param int $httpauth The type of authentication - */ - protected function setHttpAuth($httpauth) - { - $this->setOpt(CURLOPT_HTTPAUTH, $httpauth); - } - - // public methods - - /** - * @deprecated calling exec() directly is discouraged - */ - public function _exec() - { - return $this->exec(); - } - - // functions - - /** - * Make a get request with optional data. - * - * The get request has no body data, the data will be correctly added to the $url with the http_build_query() method. - * - * @param string $url The url to make the get request for - * @param array $data Optional arguments who are part of the url - * @return self - */ - public function get($url, $data = array()) - { - if (count($data) > 0) { - $this->setOpt(CURLOPT_URL, $url.'?'.http_build_query($data)); - } else { - $this->setOpt(CURLOPT_URL, $url); - } - $this->setOpt(CURLOPT_HTTPGET, true); - $this->exec(); - return $this; - } - - /** - * Make a post request with optional post data. - * - * @param string $url The url to make the post request - * @param array $data Post data to pass to the url - * @return self - */ - public function post($url, $data = array()) - { - $this->setOpt(CURLOPT_URL, $url); - $this->preparePayload($data); - $this->exec(); - return $this; - } - - /** - * Make a put request with optional data. - * - * The put request data can be either sent via payload or as get paramters of the string. - * - * @param string $url The url to make the put request - * @param array $data Optional data to pass to the $url - * @param bool $payload Whether the data should be transmitted trough payload or as get parameters of the string - * @return self - */ - public function put($url, $data = array(), $payload = false) - { - if (! empty($data)) { - if ($payload === false) { - $url .= '?'.http_build_query($data); - } else { - $this->preparePayload($data); - } - } - - $this->setOpt(CURLOPT_URL, $url); - $this->setOpt(CURLOPT_CUSTOMREQUEST, 'PUT'); - $this->exec(); - return $this; - } - - /** - * Make a patch request with optional data. - * - * The patch request data can be either sent via payload or as get paramters of the string. - * - * @param string $url The url to make the patch request - * @param array $data Optional data to pass to the $url - * @param bool $payload Whether the data should be transmitted trough payload or as get parameters of the string - * @return self - */ - public function patch($url, $data = array(), $payload = false) - { - if (! empty($data)) { - if ($payload === false) { - $url .= '?'.http_build_query($data); - } else { - $this->preparePayload($data); - } - } - - $this->setOpt(CURLOPT_URL, $url); - $this->setOpt(CURLOPT_CUSTOMREQUEST, 'PATCH'); - $this->exec(); - return $this; - } - - /** - * Make a delete request with optional data. - * - * @param string $url The url to make the delete request - * @param array $data Optional data to pass to the $url - * @param bool $payload Whether the data should be transmitted trough payload or as get parameters of the string - * @return self - */ - public function delete($url, $data = array(), $payload = false) - { - if (! empty($data)) { - if ($payload === false) { - $url .= '?'.http_build_query($data); - } else { - $this->preparePayload($data); - } - } - - $this->setOpt(CURLOPT_URL, $url); - $this->setOpt(CURLOPT_CUSTOMREQUEST, 'DELETE'); - $this->exec(); - return $this; - } - - // setters - - /** - * Pass basic auth data. - * - * If the the rquested url is secured by an httaccess basic auth mechanism you can use this method to provided the auth data. - * - * ```php - * $curl = new Curl(); - * $curl->setBasicAuthentication('john', 'doe'); - * $curl->get('http://example.com/secure.php'); - * ``` - * - * @param string $username The username for the authentification - * @param string $password The password for the given username for the authentification - * @return self - */ - public function setBasicAuthentication($username, $password) - { - $this->setHttpAuth(self::AUTH_BASIC); - $this->setOpt(CURLOPT_USERPWD, $username.':'.$password); - return $this; - } - - /** - * Provide optional header informations. - * - * In order to pass optional headers by key value pairing: - * - * ```php - * $curl = new Curl(); - * $curl->setHeader('X-Requested-With', 'XMLHttpRequest'); - * $curl->get('http://example.com/request.php'); - * ``` - * - * @param string $key The header key - * @param string $value The value for the given header key - * @return self - */ - public function setHeader($key, $value) - { - $this->_headers[$key] = $key.': '.$value; - $this->setOpt(CURLOPT_HTTPHEADER, array_values($this->_headers)); - return $this; - } - - /** - * Provide a User Agent. - * - * In order to provide you cusomtized user agent name you can use this method. - * - * ```php - * $curl = new Curl(); - * $curl->setUserAgent('My John Doe Agent 1.0'); - * $curl->get('http://example.com/request.php'); - * ``` - * - * @param string $useragent The name of the user agent to set for the current request - * @return self - */ - public function setUserAgent($useragent) - { - $this->setOpt(CURLOPT_USERAGENT, $useragent); - return $this; - } - - /** - * @deprecated Call setReferer() instead - */ - public function setReferrer($referrer) - { - $this->setReferer($referrer); - return $this; - } - - /** - * Set the HTTP referer header. - * - * The $referer informations can help identify the requested client where the requested was made. - * - * @param string $referer An url to pass and will be set as referer header - * @return self - */ - public function setReferer($referer) - { - $this->setOpt(CURLOPT_REFERER, $referer); - return $this; - } - - /** - * Set contents of HTTP Cookie header. - * - * @param string $key The name of the cookie - * @param string $value The value for the provided cookie name - * @return self - */ - public function setCookie($key, $value) - { - $this->_cookies[$key] = $value; - $this->setOpt(CURLOPT_COOKIE, http_build_query($this->_cookies, '', '; ')); - return $this; - } - - /** - * Set customized curl options. - * - * To see a full list of options: http://php.net/curl_setopt - * - * @see http://php.net/curl_setopt - * - * @param int $option The curl option constante e.g. `CURLOPT_AUTOREFERER`, `CURLOPT_COOKIESESSION` - * @param mixed $value The value to pass for the given $option - */ - public function setOpt($option, $value) - { - return curl_setopt($this->curl, $option, $value); - } - - /** - * Get customized curl options. - * - * To see a full list of options: http://php.net/curl_getinfo - * - * @see http://php.net/curl_getinfo - * - * @param int $option The curl option constante e.g. `CURLOPT_AUTOREFERER`, `CURLOPT_COOKIESESSION` - * @param mixed $value The value to check for the given $option - */ - public function getOpt($option) - { - return curl_getinfo($this->curl, $option); - } - - /** - * Return the endpoint set for curl - * - * @see http://php.net/curl_getinfo - * - * @return string of endpoint - */ - public function getEndpoint() - { - return $this->getOpt(CURLINFO_EFFECTIVE_URL); - } - - /** - * Enable verbositiy. - * - * @todo As to keep naming convention it should be renamed to `setVerbose()` - * - * @param string $on - * @return self - */ - public function verbose($on = true) - { - $this->setOpt(CURLOPT_VERBOSE, $on); - return $this; - } - - /** - * Reset all curl options. - * - * In order to make multiple requests with the same curl object all settings requires to be reset. - * @return self - */ - public function reset() - { - $this->close(); - $this->_cookies = array(); - $this->_headers = array(); - $this->error = false; - $this->error_code = 0; - $this->error_message = null; - $this->curl_error = false; - $this->curl_error_code = 0; - $this->curl_error_message = null; - $this->http_error = false; - $this->http_status_code = 0; - $this->http_error_message = null; - $this->request_headers = null; - $this->response_headers = array(); - $this->response = null; - $this->init(); - return $this; - } - - /** - * Closing the current open curl resource. - * @return self - */ - public function close() - { - if (is_resource($this->curl)) { - curl_close($this->curl); - } - return $this; - } - - /** - * Close the connection when the Curl object will be destroyed. - */ - public function __destruct() - { - $this->close(); - } - - /** - * Was an 'info' header returned. - * @return bool - */ - public function isInfo() - { - return $this->http_status_code >= 100 && $this->http_status_code < 200; - } - - /** - * Was an 'OK' response returned. - * @return bool - */ - public function isSuccess() - { - return $this->http_status_code >= 200 && $this->http_status_code < 300; - } - - /** - * Was a 'redirect' returned. - * @return bool - */ - public function isRedirect() - { - return $this->http_status_code >= 300 && $this->http_status_code < 400; - } - - /** - * Was an 'error' returned (client error or server error). - * @return bool - */ - public function isError() - { - return $this->http_status_code >= 400 && $this->http_status_code < 600; - } - - /** - * Was a 'client error' returned. - * @return bool - */ - public function isClientError() - { - return $this->http_status_code >= 400 && $this->http_status_code < 500; - } - - /** - * Was a 'server error' returned. - * @return bool - */ - public function isServerError() - { - return $this->http_status_code >= 500 && $this->http_status_code < 600; - } - - /** - * Get a specific response header key or all values from the response headers array. - * - * Usage example: - * - * ```php - * $curl = (new Curl())->get('http://example.com'); - * - * echo $curl->getResponseHeaders('Content-Type'); - * ``` - * - * Or in order to dump all keys with the given values use: - * - * ```php - * $curl = (new Curl())->get('http://example.com'); - * - * var_dump($curl->getResponseHeaders()); - * ``` - * - * @param string $headerKey Optional key to get from the array. - * @return bool|string - * @since 1.9 - */ - public function getResponseHeaders($headerKey = null) - { - $headers = array(); - $headerKey = strtolower($headerKey); - - foreach ($this->response_headers as $header) { - $parts = explode(":", $header, 2); - - $key = isset($parts[0]) ? $parts[0] : null; - $value = isset($parts[1]) ? $parts[1] : null; - - $headers[trim(strtolower($key))] = trim($value); - } - - if ($headerKey) { - return isset($headers[$headerKey]) ? $headers[$headerKey] : false; - } - - return $headers; - } - - public function getResponse() - { - return $this->response; - } - - public function getErrorCode() - { - return $this->curl_error_code; - } - - public function getErrorMessage() - { - return $this->curl_error_message; - } - - public function getHttpStatus() - { - return $this->http_status_code; - } -} diff --git a/vendor/curl/curl/tests/CurlTest.php b/vendor/curl/curl/tests/CurlTest.php deleted file mode 100644 index bad7bac..0000000 --- a/vendor/curl/curl/tests/CurlTest.php +++ /dev/null @@ -1,277 +0,0 @@ -curl = new Curl(); - $this->curl->setOpt(CURLOPT_SSL_VERIFYPEER, FALSE); - $this->curl->setOpt(CURLOPT_SSL_VERIFYHOST, FALSE); - } - - function server($request_method, $data='') { - $request_method = strtolower($request_method); - $this->curl->$request_method(self::TEST_URL . '/server.php', $data); - return $this->curl->response; - } - - public function testExtensionLoaded() { - - $this->assertTrue(extension_loaded('curl')); - } - - public function testUserAgent() { - - $this->curl->setUserAgent(Curl::USER_AGENT); - $this->assertEquals(Curl::USER_AGENT, $this->server('GET', array( - 'test' => 'server', - 'key' => 'HTTP_USER_AGENT', - ))); - - } - - public function testGet() { - $this->assertTrue($this->server('GET', array( - 'test' => 'server', - 'key' => 'REQUEST_METHOD', - )) === 'GET'); - } - - public function testPostRequestMethod() { - $this->assertTrue($this->server('POST', array( - 'test' => 'server', - 'key' => 'REQUEST_METHOD', - )) === 'POST'); - } - - public function testPostData() { - $this->assertTrue($this->server('POST', array( - 'test' => 'post', - 'key' => 'test', - )) === 'post'); - } - - public function testPostMultidimensionalData() { - - $data = array( - 'key' => 'file', - 'file' => array( - 'wibble', - 'wubble', - 'wobble', - ), - ); - - $this->curl->post(self::TEST_URL . '/post_multidimensional.php', $data); - - $this->assertEquals( - 'key=file&file%5B0%5D=wibble&file%5B1%5D=wubble&file%5B2%5D=wobble', - $this->curl->response); - - } - - public function testPostFilePathUpload() - { - - $file_path = $this->get_png(); - - $data = array( - 'key' => 'image', - 'image' => '@' . $file_path, - ); - - $this->curl->setOpt(CURLOPT_RETURNTRANSFER, true); - - $this->curl->post(self::TEST_URL . '/post_file_path_upload.php', $data); - - $this->assertEquals( - array( - 'request_method' => 'POST', - 'key' => 'image', - 'mime_content_type' => 'ERROR', // Temp change the image response, but assuming this is not fixing the issue indeed. - //'mime_content_type' => 'image/png' - ), - json_decode($this->curl->response, true)); - - unlink($file_path); - } - - public function testPutRequestMethod() { - $this->assertTrue($this->server('PUT', array( - 'test' => 'server', - 'key' => 'REQUEST_METHOD', - )) === 'PUT'); - } - - public function testPutData() { - $this->assertTrue($this->server('PUT', array( - 'test' => 'put', - 'key' => 'test', - )) === 'put'); - } - - public function testPutFileHandle() { - $png = $this->create_png(); - $tmp_file = $this->create_tmp_file($png); - - $this->curl->setOpt(CURLOPT_PUT, TRUE); - $this->curl->setOpt(CURLOPT_INFILE, $tmp_file); - $this->curl->setOpt(CURLOPT_INFILESIZE, strlen($png)); - $this->curl->put(self::TEST_URL . '/server.php', array( - 'test' => 'put_file_handle', - )); - - fclose($tmp_file); - - $this->assertTrue($this->curl->response === 'image/png'); - } - - public function testDelete() { - $this->assertTrue($this->server('DELETE', array( - 'test' => 'server', - 'key' => 'REQUEST_METHOD', - )) === 'DELETE'); - - $this->assertTrue($this->server('DELETE', array( - 'test' => 'delete', - 'key' => 'test', - )) === 'delete'); - } - - public function testBasicHttpAuth() { - - $data = array(); - - $this->curl->get(self::TEST_URL . '/http_basic_auth.php', $data); - - $this->assertEquals('canceled', $this->curl->response); - - $username = 'myusername'; - $password = 'mypassword'; - - $this->curl->setBasicAuthentication($username, $password); - - $this->curl->get(self::TEST_URL . '/http_basic_auth.php', $data); - - $this->assertEquals( - '{"username":"myusername","password":"mypassword"}', - $this->curl->response); - } - - public function testReferrer() { - $this->curl->setReferer('myreferrer'); - $this->assertTrue($this->server('GET', array( - 'test' => 'server', - 'key' => 'HTTP_REFERER', - )) === 'myreferrer'); - } - - public function testDeprecatedReferrer() { - $this->curl->setReferrer('myreferrer'); - $this->assertTrue($this->server('GET', array( - 'test' => 'server', - 'key' => 'HTTP_REFERER', - )) === 'myreferrer'); - } - - public function testCookies() { - $this->curl->setCookie('mycookie', 'yum'); - $this->assertTrue($this->server('GET', array( - 'test' => 'cookie', - 'key' => 'mycookie', - )) === 'yum'); - } - - public function testError() { - $this->curl->setOpt(CURLOPT_CONNECTTIMEOUT_MS, 2000); - $this->curl->get('http://1.2.3.4/'); - $this->assertTrue($this->curl->error === TRUE); - $this->assertTrue($this->curl->curl_error === TRUE); - $this->assertTrue($this->curl->curl_error_code === CURLE_OPERATION_TIMEOUTED); - } - - public function testHeaders() { - $this->curl->setHeader('Content-Type', 'application/json'); - $this->curl->setHeader('X-Requested-With', 'XMLHttpRequest'); - $this->curl->setHeader('Accept', 'application/json'); - $this->assertTrue($this->server('GET', array( - 'test' => 'server', - 'key' => 'CONTENT_TYPE', - )) === 'application/json'); - $this->assertTrue($this->server('GET', array( - 'test' => 'server', - 'key' => 'HTTP_X_REQUESTED_WITH', - )) === 'XMLHttpRequest'); - $this->assertTrue($this->server('GET', array( - 'test' => 'server', - 'key' => 'HTTP_ACCEPT', - )) === 'application/json'); - } - - public function testHeadersWithContinue() { - $headers = file(dirname(__FILE__) . '/data/response_headers_with_continue.txt'); - - $this->curl->response_headers = array(); - foreach($headers as $header_line) { - $this->curl->addResponseHeaderLine(null, $header_line); - } - - $expected_headers = array_values(array_filter(array_map(function($l) { return trim($l, "\r\n"); }, array_slice($headers, 1)))); - - $this->assertEquals($expected_headers, $this->curl->response_headers); - } - - public function testReset() - { - $curl = $this->getMockBuilder(get_class($this->curl))->getMock(); - $curl->expects($this->once())->method('reset')->with(); - // lets make small request - $curl->setOpt(CURLOPT_CONNECTTIMEOUT_MS, 2000); - $curl->get('http://1.2.3.4/'); - $curl->reset(); - $this->assertFalse($curl->error); - $this->assertSame(0, $curl->error_code); - $this->assertNull($curl->error_message); - $this->assertFalse($curl->curl_error); - $this->assertSame(0, $curl->curl_error_code); - $this->assertNull($curl->curl_error_message); - $this->assertFalse($curl->http_error); - $this->assertSame(0, $curl->http_status_code); - $this->assertNull($curl->http_error_message); - $this->assertNull($curl->request_headers); - $this->assertEmpty($curl->response_headers); - $this->assertNull($curl->response); - } - - function create_png() { - // PNG image data, 1 x 1, 1-bit colormap, non-interlaced - ob_start(); - imagepng(imagecreatefromstring(base64_decode('R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAAALAAAAAABAAEAAAIBRAA7'))); - $raw_image = ob_get_contents(); - ob_end_clean(); - return $raw_image; - } - - function create_tmp_file($data) { - $tmp_file = tmpfile(); - fwrite($tmp_file, $data); - rewind($tmp_file); - return $tmp_file; - } - - function get_png() { - $tmp_filename = tempnam('/tmp', 'php-curl-class.'); - file_put_contents($tmp_filename, $this->create_png()); - return $tmp_filename; - } -} diff --git a/vendor/curl/curl/tests/data/response_headers_with_continue.txt b/vendor/curl/curl/tests/data/response_headers_with_continue.txt deleted file mode 100644 index de80fc1..0000000 --- a/vendor/curl/curl/tests/data/response_headers_with_continue.txt +++ /dev/null @@ -1,13 +0,0 @@ -HTTP/1.1 100 Continue - -HTTP/1.1 200 OK -Server: nginx/1.1.19 -Date: Fri, 11 Aug 2017 13:22:00 GMT -Content-Type: image/jpeg -Content-Length: 62574 -Connection: close -Cache-Control: max-age=7257600 -Expires: Fri, 03 Nov 2017 13:22:00 GMT -Strict-Transport-Security: max-age=31536000; includeSubDomains -X-Frame-Option: DENY - diff --git a/vendor/curl/curl/tests/data/test.png b/vendor/curl/curl/tests/data/test.png deleted file mode 100644 index 3b7a6907ee45022b84092dc6b3ed0a65580fcd9f..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 2855 zcmZuzdpy)>7yr%0jLWY^uVO|lWy!5I7g3U->1MSO+mdR`Fk{+w3#B&^Gt4wHBOz(r zr_b)2w#2M#QJB#%R5n7JTGFgrLdJbCe!qA6fB!hoInVRQ_j5kqbDnd)89{;j*BDzG z0|2nb|G=JLy(fM#LyZ0mMJ}c59h&3!ogV;HwRswd#`JeW{NAwmkeKNB3(T`@z>gIZ z$+q=B&E&9y*-X~OxMyr{05Goe-{bdv!n;Tv3{#as7y!|{y=NC->Pvf%0ub&){}#4= z^@|z6#9yIzEl;8Y2fs->XZU#}=-I{71_souq1+4htH}%>bJ-B_N5`X#P3+`l=elL% zlWTv`0$crJj9c6ve4KJWWaHD~c;{V0V^UJx%ejh#sS6c`#l(ZL6MYHEQw;{D-HbIy z8M11GH61(GALti%n8G~Fq}e{PK4I+({0;`U-qXReTrUbXU0j`O#eZ=3Kguyq2jP9g z@*QENxh)S17?+Vci&>dSwMQlvGH55x$a zj&Hss`c&8j{(kp4cxHDb7!ctd8J0NWrF@b!)kvaL3xI%JlaGlAYLfV21sI`;knpgwqP9X7v{FQtF z-a?wKJ{E9KOcabclC<9*AYUO?Qzn+RxlHKo-(XctJ>04*+kukY3th#UkAZTM$#*=! z(hw(u;nr*eaNg2~K;IN{!b!-m{}Ds;@xw5-DrJ-XCc=WzGbC^MTz=}bXVf~BOSgB> z@94W?ysW1VKnTVV`!MEG10RLJslBbO*2rO@M%F3^O?+G7$O;yVU981@K9%i8T!S{W1#v_u%9X+9Oh!ktS=#v} z9I>AyK+ZQvYgpLFZpJ4Qaq0L+zZPZ@X_CjT{L50J(ni|c!Q~RpBriReefWnaEFT>p z5mPvlX9`sh;=elXzIq1~+Ej>zU}mPpkztN$$Pl|WdE}2j7jIJkLf+FbFf+S0El=TS zYwTRkl}gDt{^d$XnzVR5-}|PivO~@#Nbp>u_tNrJ=I7yb@WRW+D2DcVtsvvFeUTc6 zLUr4iSKP`10i+?tXq63V7A;WxKYq1ERgoylu_(6+sD7m(KPRKf!f_?5=AWD!N{#wn^bgqwzy`IfxaE6##XvMQLPEII=wJO;MRfw>t{5?sR1ar;IHinR8d*>|j zO9_TG+Ym@YCCU{(I+Btw_hMv0Tm?3ric|zGx5m!TCEXive__B49#rO&;Do6O9iu*j@p_(P3!kCq{zDuV5

%b7-H0teYx%>*@3y_%6kt4W++=B9yOPV5e&jTFzsV^M-E&MU)?9d0gUYe-% zJye0l*m&l5i4piuJ7AbN$+j4+33)KYxk9-fZJ1ADnKFMsdRc`z0^JTo0l^2l*ed(< zGC+8)+=rw~hG26F<(%UnSM^Bh-k}qhqz-Sv24=3+*RcqT=e=GKabxiBPc*I;sJqhB z!H38cFz%ftc-P@L=&GBb!(2+G!FlWK@N1_OHI1T->+EXTH;;Jy23y+QQZN3EY?5rr z?YU-fDn>W}r;v08%TWK+8*t<2MquQA2Pn6J8k(A$*Q4G4@u(Z#sHZuBXVk-7YMsej z!p*84;ULPy#j_>q`KfZS@@WpZPVp|dDbGw&+}oGBmJUv|9{~97X==|wKaax;L3_`g z5eB)e6XG|}FdG6$o*~Oj87%3J4x?ishRN+1PlbT}ZnnQ&Vi%=sYk|9E<`i^|N zEYN@9oI4VTs1#K`gXxrgeTmwl*|ecJ42O9hb8J(L&?fvR!e6V&&om#kF$3}`sDv@Z z${qcI71i0pVaA5$MRpFqVKX2%0Gy8jxx9jToGFCzg2v@H!6!BitnKG#u*#}qTsfIS z-VadPk1}#m`CS5Wz;VEqM|7!%HO?#E$_v8&swtw@Iwaw#^u+~nYMFm(6TMfgu3GRA zzNXU2_4Nx8^q`E4rajlMHXZrlhF{?cvoL^6(J{38AsOKdw#$5VlkiVMen%;(c>5|} z-8wIqOC%Bs@9G2ohvyr@4wHgS_1j87KyeGQ1M=7~ow(eepyR9ahxTHu>5Qadxh?;X zY%}GS$r6xr*@q}OU__LNPmwf~7ktWC89X4(f(_#JGkprClZq4PC2GxB1UOI$!7X!T zJ8->J`tJJ=jr4U?DIx@>c6Q1UxQy^kV_nl7Zn)Zk?_Rzch2=XEn-lO8Et7aqKqqS+ zh!A3WpfHAOdkeCVu%I*=gd+|`kc-g|$8aU9FEqneo%Pgqh3f4<0lcZi?QX&s&Z2-k zfoaPNPuo)MxRbK}1&&L`i~5*BLqlxsns`UM&R1!!GYW87tR98k7+6^U8t4J1P8QjK zG5D8A8q`(?`?gY2Eji~EAB&zFeml!;J2d-I4=ww8Q*`{TtFRBnCvl1A|5^0Z4io2O zz))rySardv1~AoDDDXnVsEdQ}exb|MS7fUvnrrG=0>f@;1+L4ifD6}c4cHg#e60Qp zS`W(gqt?1Jq&sC7gIR0zGu?Y7E?xabUZw~h*{Mf)Mdh{F%iQM)vaP`+}3 z2|mS4HbeKcCO0R8WYXK1+Jj#o^}Swggv|hy+inEd4Q2k@hK@6^S*`v26~9A4roC1S zbov4{)|I#WtM6M=n6-;zPD&%jGw3sZCu(q~>a`;K5c2k&h=X7S*!x(JhvWfIH2}pN z2Q)>gs9=%&J?C0O<#K^v?l=jByN<7s=uD;!iLa?Tq^olkkzBv4sYRhJjC1Rpc7kD2 zauhm$4&?sW-GDN*b@-XYTSFlc4@D-nEYA+xtrhMsiy&WGStJx@^4Kf#D)5@zhq*n8 zpQB#&{|ciTbqysH6l~`CubSOoDcASo+dOTDz?woVVd|dOkB)B*6t1|zC?Rq30GY9$zNwXGcb(|I35qxAtQ<+VV7ip~5VSUqIBTnUp0-iTXxw z5(e>54wk|7oLY!ipq@8EU{u(juMrC1bfPY*-l*~w3~{9My`H|nx*Hiemt+~!W`j3k zJQL2Bx+F(y?X<<}gwz1#iEQc-4%hPFP;I#HFZRBkizM>uF{I_T zsAwtOEH&(l@=OsK+46+-SiLrDOVT&nzlI9HYB8I3zRt^BuXF+by@7iwcQexd4;dX$ A@c;k- diff --git a/vendor/curl/curl/tests/server/Dockerfile b/vendor/curl/curl/tests/server/Dockerfile deleted file mode 100644 index 96689f8..0000000 --- a/vendor/curl/curl/tests/server/Dockerfile +++ /dev/null @@ -1,9 +0,0 @@ -FROM alpine:3.7 - -RUN apk add --no-cache php5-cli php5-curl php5-gd php5-phar php5-json php5-openssl php5-dom - -COPY php-curl-test php-curl-test - -EXPOSE 80 - -CMD ["php5", "-S", "0.0.0.0:80", "-t", "php-curl-test"] diff --git a/vendor/curl/curl/tests/server/php-curl-test/deploy.php b/vendor/curl/curl/tests/server/php-curl-test/deploy.php deleted file mode 100644 index 0d8bdab..0000000 --- a/vendor/curl/curl/tests/server/php-curl-test/deploy.php +++ /dev/null @@ -1,37 +0,0 @@ -\$ {$command}\n"; - $output .= htmlentities(trim($tmp)) . "\n"; -} - -// Make it pretty for manual user access (and why not?) -?> - - - - - GIT DEPLOYMENT SCRIPT - - -

-. ____ . ____________________________
-|/ \| | |
-[| ♥ ♥ |] | Git Deployment Script v0.1 |
-|___==___| / © oodavid 2012 |
-|____________________________|
-
-    
-
- - \ No newline at end of file diff --git a/vendor/curl/curl/tests/server/php-curl-test/http_basic_auth.php b/vendor/curl/curl/tests/server/php-curl-test/http_basic_auth.php deleted file mode 100644 index 336fb0c..0000000 --- a/vendor/curl/curl/tests/server/php-curl-test/http_basic_auth.php +++ /dev/null @@ -1,14 +0,0 @@ - $_SERVER['PHP_AUTH_USER'], - 'password' => $_SERVER['PHP_AUTH_PW'], -)); \ No newline at end of file diff --git a/vendor/curl/curl/tests/server/php-curl-test/post_file_path_upload.php b/vendor/curl/curl/tests/server/php-curl-test/post_file_path_upload.php deleted file mode 100644 index aa54477..0000000 --- a/vendor/curl/curl/tests/server/php-curl-test/post_file_path_upload.php +++ /dev/null @@ -1,21 +0,0 @@ - '_COOKIE', - 'delete' => '_GET', - 'post' => '_POST', - 'put' => '_GET', - 'server' => '_SERVER', -); - -if(isset($data_mapping[$test])) { - $data = ${$data_mapping[$test]}; - $value = isset($data[$key]) ? $data[$key] : ''; -echo $value; -} else { - echo "Error."; -} diff --git a/vendor/owner888/phpspider/README.md b/vendor/owner888/phpspider/README.md deleted file mode 100644 index 0732cb2..0000000 --- a/vendor/owner888/phpspider/README.md +++ /dev/null @@ -1,52 +0,0 @@ -# phpspider -- PHP蜘蛛爬虫框架 -《我用爬虫一天时间“偷了”知乎一百万用户,只为证明PHP是世界上最好的语言 》所使用的程序 - -phpspider是一个爬虫开发框架。使用本框架,你不用了解爬虫的底层技术实现,爬虫被网站屏蔽、有些网站需要登录或验证码识别才能爬取等问题。简单几行PHP代码,就可以创建自己的爬虫,利用框架封装的多进程Worker类库,代码更简洁,执行效率更高速度更快。 - -demo目录下有一些特定网站的爬取规则,只要你安装了PHP环境,代码就可以在命令行下直接跑。 对爬虫感兴趣的开发者可以加QQ群一起讨论:147824717。 - -下面以糗事百科为例, 来看一下我们的爬虫长什么样子: - -``` -$configs = array( - 'name' => '糗事百科', - 'domains' => array( - 'qiushibaike.com', - 'www.qiushibaike.com' - ), - 'scan_urls' => array( - 'http://www.qiushibaike.com/' - ), - 'content_url_regexes' => array( - "http://www.qiushibaike.com/article/\d+" - ), - 'list_url_regexes' => array( - "http://www.qiushibaike.com/8hr/page/\d+\?s=\d+" - ), - 'fields' => array( - array( - // 抽取内容页的文章内容 - 'name' => "article_content", - 'selector' => "//*[@id='single-next-link']", - 'required' => true - ), - array( - // 抽取内容页的文章作者 - 'name' => "article_author", - 'selector' => "//div[contains(@class,'author')]//h2", - 'required' => true - ), - ), -); -$spider = new phpspider($configs); -$spider->start(); -``` -爬虫的整体框架就是这样, 首先定义了一个$configs数组, 里面设置了待爬网站的一些信息, 然后通过调用```$spider = new phpspider($configs);```和```$spider->start();```来配置并启动爬虫. - -#### 运行界面如下: - -![](http://www.epooll.com/zhihu/pachong.gif) - -更多详细内容,移步到: - -[开发文档](http://doc.phpspider.org) diff --git a/vendor/owner888/phpspider/autoloader.php b/vendor/owner888/phpspider/autoloader.php deleted file mode 100644 index 6870644..0000000 --- a/vendor/owner888/phpspider/autoloader.php +++ /dev/null @@ -1,77 +0,0 @@ - - * @copyright seatle - * @link http://www.phpspider.org/ - * @license http://www.opensource.org/licenses/mit-license.php MIT License - */ -namespace phpspider; - -/** - * autoloader. - */ -class autoloader -{ - /** - * Autoload root path. - * - * @var string - */ - protected static $_autoload_root_path = ''; - - /** - * Set autoload root path. - * - * @param string $root_path - * @return void - */ - public static function set_root_path($root_path) - { - self::$_autoload_root_path = $root_path; - } - - /** - * Load files by namespace. - * - * @param string $name - * @return boolean - */ - public static function load_by_namespace($name) - { - $class_path = str_replace('\\', DIRECTORY_SEPARATOR, $name); - - if (strpos($name, 'phpspider\\') === 0) - { - $class_file = __DIR__ . substr($class_path, strlen('phpspider')) . '.php'; - } - else - { - if (self::$_autoload_root_path) - { - $class_file = self::$_autoload_root_path . DIRECTORY_SEPARATOR . $class_path . '.php'; - } - if (empty($class_file) || !is_file($class_file)) - { - $class_file = __DIR__ . DIRECTORY_SEPARATOR . '..' . DIRECTORY_SEPARATOR . "$class_path.php"; - } - } - - if (is_file($class_file)) - { - require_once($class_file); - if (class_exists($name, false)) - { - return true; - } - } - return false; - } -} - -spl_autoload_register('\phpspider\autoloader::load_by_namespace'); diff --git a/vendor/owner888/phpspider/composer.json b/vendor/owner888/phpspider/composer.json deleted file mode 100644 index da533c7..0000000 --- a/vendor/owner888/phpspider/composer.json +++ /dev/null @@ -1,38 +0,0 @@ -{ - "name": "owner888/phpspider", - "type": "library", - "keywords": [ - "framework", - "phpspider" - ], - "homepage": "http://www.phpspider.org", - "license": "MIT", - "description": "The PHPSpider Framework.", - "authors": [ - { - "name": "Seatle Yang", - "email": "seatle@foxmail.com", - "homepage": "http://www.phpspider.org", - "role": "Developer" - } - ], - "support": { - "email": "seatle@foxmail.com", - "issues": "https://github.com/owner888/phpspider/issues", - "forum": "http://wenda.phpspider.org/", - "wiki": "http://doc.phpspider.org/", - "source": "https://github.com/owner888/phpspider" - }, - "require": { - "php": ">=5.5.0" - }, - "suggest": { - "ext-pcntl、ext-redis": "For better performance. " - }, - "autoload": { - "psr-4": { - "phpspider\\": "./" - } - }, - "minimum-stability": "dev" -} diff --git a/vendor/owner888/phpspider/core/cache.php b/vendor/owner888/phpspider/core/cache.php deleted file mode 100644 index a6ed009..0000000 --- a/vendor/owner888/phpspider/core/cache.php +++ /dev/null @@ -1,64 +0,0 @@ - -// +---------------------------------------------------------------------- - -//---------------------------------- -// PHPSpider缓存类文件 -//---------------------------------- - -class cache -{ - // 多进程下面不能用单例模式 - //protected static $_instance; - /** - * 获取实例 - * - * @return void - * @author seatle - * @created time :2016-04-10 22:55 - */ - public static function init() - { - if(extension_loaded('Redis')) - { - $_instance = new Redis(); - } - else - { - $errmsg = "extension redis is not installed"; - log::add($errmsg, "Error"); - return null; - } - // 这里不能用pconnect,会报错:Uncaught exception 'RedisException' with message 'read error on connection' - $_instance->connect($GLOBALS['config']['redis']['host'], $GLOBALS['config']['redis']['port'], $GLOBALS['config']['redis']['timeout']); - - // 验证 - if ($GLOBALS['config']['redis']['pass']) - { - if ( !$_instance->auth($GLOBALS['config']['redis']['pass']) ) - { - $errmsg = "Redis Server authentication failed!!"; - log::add($errmsg, "Error"); - return null; - } - } - - // 不序列化的话不能存数组,用php的序列化方式其他语言又不能读取,所以这里自己用json序列化了,性能还比php的序列化好1.4倍 - //$_instance->setOption(Redis::OPT_SERIALIZER, Redis::SERIALIZER_NONE); // don't serialize data - //$_instance->setOption(Redis::OPT_SERIALIZER, Redis::SERIALIZER_PHP); // use built-in serialize/unserialize - //$_instance->setOption(Redis::OPT_SERIALIZER, Redis::SERIALIZER_IGBINARY); // use igBinary serialize/unserialize - - $_instance->setOption(Redis::OPT_PREFIX, $GLOBALS['config']['redis']['prefix'] . ":"); - - return $_instance; - } -} - - diff --git a/vendor/owner888/phpspider/core/constants.php b/vendor/owner888/phpspider/core/constants.php deleted file mode 100644 index 2d6dd4e..0000000 --- a/vendor/owner888/phpspider/core/constants.php +++ /dev/null @@ -1,55 +0,0 @@ - -// +---------------------------------------------------------------------- - -//---------------------------------- -// PHPSpider公共入口文件 -//---------------------------------- - -//namespace phpspider\core; - -// Display errors. -ini_set('display_errors', 'on'); -// Reporting all. -error_reporting(E_ALL); - -// 永不超时 -ini_set('max_execution_time', 0); -set_time_limit(0); -// 内存限制,如果外面设置的内存比 /etc/php/php-cli.ini 大,就不要设置了 -if (intval(ini_get("memory_limit")) < 1024) -{ - ini_set('memory_limit', '1024M'); -} - -if( PHP_SAPI != 'cli' ) -{ - exit("You must run the CLI environment\n"); -} - -// Date.timezone -if (!ini_get('date.timezone')) -{ - date_default_timezone_set('Asia/Shanghai'); -} - -//核心库目录 -define('CORE', dirname(__FILE__)); -define('PATH_ROOT', CORE."/../"); -define('PATH_DATA', CORE."/../data"); -define('PATH_LIBRARY', CORE."/../library"); - -//系统配置 -//if( file_exists( PATH_ROOT."/config/inc_config.php" ) ) -//{ - //require PATH_ROOT."/config/inc_config.php"; -//} - - diff --git a/vendor/owner888/phpspider/core/db.php b/vendor/owner888/phpspider/core/db.php deleted file mode 100644 index f891a9d..0000000 --- a/vendor/owner888/phpspider/core/db.php +++ /dev/null @@ -1,579 +0,0 @@ - -// +---------------------------------------------------------------------- - -//---------------------------------- -// PHPSpider数据库类文件 -//---------------------------------- - -namespace phpspider\core; - -class db -{ - private static $configs = array(); - private static $rsid; - private static $links = array(); - private static $link_name = 'default'; - private static $autocommiting = false; - - public static function _init() - { - // 获取配置 - $config = self::$link_name == 'default' ? self::_get_default_config() : self::$configs[self::$link_name]; - - // 创建连接 - if (empty(self::$links[self::$link_name]) || empty(self::$links[self::$link_name]['conn'])) - { - // 第一次连接,初始化fail和pid - if (empty(self::$links[self::$link_name])) - { - self::$links[self::$link_name]['fail'] = 0; - self::$links[self::$link_name]['pid'] = function_exists('posix_getpid') ? posix_getpid() : 0; - //echo "progress[".self::$links[self::$link_name]['pid']."] create db connect[".self::$link_name."]\n"; - } - self::$links[self::$link_name]['conn'] = mysqli_connect($config['host'], $config['user'], $config['pass'], $config['name'], $config['port']); - if(mysqli_connect_errno()) - { - self::$links[self::$link_name]['fail']++; - $errmsg = 'Mysql Connect failed['.self::$links[self::$link_name]['fail'].']: ' . mysqli_connect_error(); - echo util::colorize(date("H:i:s") . " {$errmsg}\n\n", 'fail'); - log::add($errmsg, "Error"); - // 连接失败5次,中断进程 - if (self::$links[self::$link_name]['fail'] >= 5) - { - exit(250); - } - self::_init($config); - } - else - { - mysqli_query(self::$links[self::$link_name]['conn'], " SET character_set_connection=utf8, character_set_results=utf8, character_set_client=binary, sql_mode='' "); - } - } - else - { - $curr_pid = function_exists('posix_getpid') ? posix_getpid() : 0; - // 如果父进程已经生成资源就释放重新生成,因为多进程不能共享连接资源 - if (self::$links[self::$link_name]['pid'] != $curr_pid) - { - self::clear_link(); - } - } - } - - /** - * 重新设置连接 - * 传空的话就等于关闭数据库再连接 - * 在多进程环境下如果主进程已经调用过了,子进程一定要调用一次 clear_link,否则会报错: - * Error while reading greeting packet. PID=19615,这是两个进程互抢一个连接句柄引起的 - * - * @param array $config - * @return void - * @author seatle - * @created time :2016-03-29 00:51 - */ - public static function clear_link() - { - if(self::$links) - { - foreach(self::$links as $k=>$v) - { - @mysqli_close($v['conn']); - unset(self::$links[$k]); - } - } - // 注意,只会连接最后一个,不过貌似也够用了啊 - self::_init(); - } - - /** - * 改变链接为指定配置的链接(如果不同时使用多个数据库,不会涉及这个操作) - * @parem $link_name 链接标识名 - * @parem $config 多次使用时, 这个数组只需传递一次 - * config 格式与 $GLOBALS['config']['db'] 一致 - * @return void - */ - public static function set_connect($link_name, $config = array()) - { - self::$link_name = $link_name; - if (!empty($config)) - { - self::$configs[self::$link_name] = $config; - } - else - { - if (empty(self::$configs[self::$link_name])) - { - throw new Exception("You not set a config array for connect!"); - } - } - } - - - /** - * 还原为默认连接(如果不同时使用多个数据库,不会涉及这个操作) - * @parem $config 指定配置(默认使用inc_config.php的配置) - * @return void - */ - public static function set_connect_default() - { - $config = self::_get_default_config(); - self::set_connect('default', $config); - } - - - /** - * 获取默认配置 - */ - protected static function _get_default_config() - { - if (empty(self::$configs['default'])) - { - if (!is_array($GLOBALS['config']['db'])) - { - exit('db.php _get_default_config()' . '没有mysql配置'); - } - self::$configs['default'] = $GLOBALS['config']['db']; - } - return self::$configs['default']; - } - - /** - * 返回查询游标 - * @return rsid - */ - protected static function _get_rsid($rsid = '') - { - return $rsid == '' ? self::$rsid : $rsid; - } - - public static function autocommit($mode = false) - { - if ( self::$autocommiting ) - { - return true; - } - - self::$autocommiting = true; - - self::_init(); - return mysqli_autocommit(self::$links[self::$link_name]['conn'], $mode); - } - - public static function begin_tran() - { - return self::autocommit(false); - } - - public static function commit() - { - mysqli_commit(self::$links[self::$link_name]['conn']); - self::autocommit(true); - return true; - } - - - public static function rollback() - { - mysqli_rollback(self::$links[self::$link_name]['conn']); - self::autocommit(true); - return true; - } - - public static function query($sql) - { - $sql = trim($sql); - - // 初始化数据库 - self::_init(); - self::$rsid = @mysqli_query(self::$links[self::$link_name]['conn'], $sql); - - if (self::$rsid === false) - { - // 不要每次都ping,浪费流量浪费性能,执行出错了才重新连接 - $errno = mysqli_errno(self::$links[self::$link_name]['conn']); - if ($errno == 2013 || $errno == 2006) - { - $errmsg = mysqli_error(self::$links[self::$link_name]['conn']); - log::add($errmsg, "Error"); - - @mysqli_close(self::$links[self::$link_name]['conn']); - self::$links[self::$link_name]['conn'] = null; - return self::query($sql); - } - - $errmsg = "Query SQL: ".$sql; - log::add($errmsg, "Warning"); - $errmsg = "Error SQL: ".mysqli_error(self::$links[self::$link_name]['conn']); - log::add($errmsg, "Warning"); - - $backtrace = debug_backtrace(); - array_shift($backtrace); - $narr = array('class', 'type', 'function', 'file', 'line'); - $err = "debug_backtrace:\n"; - foreach($backtrace as $i => $l) - { - foreach($narr as $k) - { - if( !isset($l[$k]) ) - { - $l[$k] = ''; - } - } - $err .= "[$i] in function {$l['class']}{$l['type']}{$l['function']} "; - if($l['file']) $err .= " in {$l['file']} "; - if($l['line']) $err .= " on line {$l['line']} "; - $err .= "\n"; - } - log::add($err); - - return false; - } - else - { - return self::$rsid; - } - } - - public static function fetch($rsid = '') - { - $rsid = self::_get_rsid($rsid); - $row = mysqli_fetch_array($rsid, MYSQLI_ASSOC); - return $row; - } - - public static function get_one($sql) - { - if (!preg_match("/limit/i", $sql)) - { - $sql = preg_replace("/[,;]$/i", '', trim($sql)) . " limit 1 "; - } - $rsid = self::query($sql); - if ($rsid === false) - { - return array(); - } - $row = self::fetch($rsid); - self::free($rsid); - return $row; - } - - public static function get_all($sql) - { - $rsid = self::query($sql); - if ($rsid === false) - { - return array(); - } - while ( $row = self::fetch($rsid) ) - { - $rows[] = $row; - } - self::free($rsid); - return empty($rows) ? false : $rows; - } - - public static function free($rsid) - { - return mysqli_free_result($rsid); - } - - public static function insert_id() - { - return mysqli_insert_id(self::$links[self::$link_name]['conn']); - } - - public static function affected_rows() - { - return mysqli_affected_rows(self::$links[self::$link_name]['conn']); - } - - public static function insert($table = '', $data = null, $return_sql = false) - { - $items_sql = $values_sql = ""; - foreach ($data as $k => $v) - { - $v = stripslashes($v); - $v = addslashes($v); - $items_sql .= "`$k`,"; - $values_sql .= "\"$v\","; - } - $sql = "Insert Ignore Into `{$table}` (" . substr($items_sql, 0, -1) . ") Values (" . substr($values_sql, 0, -1) . ")"; - if ($return_sql) - { - return $sql; - } - else - { - if (self::query($sql)) - { - return mysqli_insert_id(self::$links[self::$link_name]['conn']); - } - else - { - return false; - } - } - } - - public static function insert_batch($table = '', $set = NULL, $return_sql = FALSE) - { - if (empty($table) || empty($set)) - { - return false; - } - $set = self::strsafe($set); - $fields = self::get_fields($table); - - $keys_sql = $vals_sql = array(); - foreach ($set as $i=>$val) - { - ksort($val); - $vals = array(); - foreach ($val as $k => $v) - { - // 过滤掉数据库没有的字段 - if (!in_array($k, $fields)) - { - continue; - } - // 如果是第一个数组,把key当做插入条件 - if ($i == 0 && $k == 0) - { - $keys_sql[] = "`$k`"; - } - $vals[] = "\"$v\""; - } - $vals_sql[] = implode(",", $vals); - } - - $sql = "Insert Ignore Into `{$table}`(".implode(", ", $keys_sql).") Values (".implode("), (", $vals_sql).")"; - - if ($return_sql) return $sql; - - $rt = self::query($sql); - $insert_id = self::insert_id(); - $return = empty($insert_id) ? $rt : $insert_id; - return $return; - } - - public static function update_batch($table = '', $set = NULL, $index = NULL, $where = NULL, $return_sql = FALSE) - { - if (empty($table) || is_null($set) || is_null($index)) - { - // 不要用exit,会中断程序 - return false; - } - $set = self::strsafe($set); - $fields = self::get_fields($table); - - $ids = array(); - foreach ($set as $val) - { - ksort($val); - // 去重,其实不去也可以,因为相同的when只会执行第一个,后面的就直接跳过不执行了 - $key = md5($val[$index]); - $ids[$key] = $val[$index]; - - foreach (array_keys($val) as $field) - { - if ($field != $index) - { - $final[$field][$key] = 'When `'.$index.'` = "'.$val[$index].'" Then "'.$val[$field].'"'; - } - } - } - //$ids = array_values($ids); - - // 如果不是数组而且不为空,就转数组 - if (!is_array($where) && !empty($where)) - { - $where = array($where); - } - $where[] = $index.' In ("'.implode('","', $ids).'")'; - $where = empty($where) ? "" : " Where ".implode(" And ", $where); - - $sql = "Update `".$table."` Set "; - $cases = ''; - - foreach ($final as $k => $v) - { - // 过滤掉数据库没有的字段 - if (!in_array($k, $fields)) - { - continue; - } - $cases .= '`'.$k.'` = Case '."\n"; - foreach ($v as $row) - { - $cases .= $row."\n"; - } - - $cases .= 'Else `'.$k.'` End, '; - } - - $sql .= substr($cases, 0, -2); - - // 其实不带 Where In ($index) 的条件也可以的 - $sql .= $where; - - if ($return_sql) return $sql; - - $rt = self::query($sql); - $insert_id = self::affected_rows(); - $return = empty($affected_rows) ? $rt : $affected_rows; - return $return; - } - - public static function update($table = '', $data = array(), $where = null, $return_sql = false) - { - $sql = "UPDATE `{$table}` SET "; - foreach ($data as $k => $v) - { - $v = stripslashes($v); - $v = addslashes($v); - $sql .= "`{$k}` = \"{$v}\","; - } - if (!is_array($where)) - { - $where = array($where); - } - // 删除空字段,不然array("")会成为WHERE - foreach ($where as $k => $v) - { - if (empty($v)) - { - unset($where[$k]); - } - } - $where = empty($where) ? "" : " Where " . implode(" And ", $where); - $sql = substr($sql, 0, -1) . $where; - if ($return_sql) - { - return $sql; - } - else - { - if (self::query($sql)) - { - return mysqli_affected_rows(self::$links[self::$link_name]['conn']); - } - else - { - return false; - } - } - } - - public static function delete($table = '', $where = null, $return_sql = false) - { - // 小心全部被删除了 - if (empty($where)) - { - return false; - } - $where = 'Where ' . (!is_array($where) ? $where : implode(' And ', $where)); - $sql = "Delete From `{$table}` {$where}"; - if ($return_sql) - { - return $sql; - } - else - { - if (self::query($sql)) - { - return mysqli_affected_rows(self::$links[self::$link_name]['conn']); - } - else - { - return false; - } - } - } - - public static function ping() - { - if (!mysqli_ping(self::$links[self::$link_name]['conn'])) - { - @mysqli_close(self::$links[self::$link_name]['conn']); - self::$links[self::$link_name]['conn'] = null; - self::_init(); - } - } - - public static function strsafe($array) - { - $arrays = array(); - if(is_array($array)===true) - { - foreach ($array as $key => $val) - { - if(is_array($val)===true) - { - $arrays[$key] = self::strsafe($val); - } - else - { - //先去掉转义,避免下面重复转义了 - $val = stripslashes($val); - //进行转义 - $val = addslashes($val); - //处理addslashes没法处理的 _ % 字符 - //$val = strtr($val, array('_'=>'\_', '%'=>'\%')); - $arrays[$key] = $val; - } - } - return $arrays; - } - else - { - $array = stripslashes($array); - $array = addslashes($array); - //$array = strtr($array, array('_'=>'\_', '%'=>'\%')); - return $array; - } - } - - // 这个是给insert、update、insert_batch、update_batch用的 - public static function get_fields($table) - { - // $sql = "SHOW COLUMNS FROM $table"; //和下面的语句效果一样 - $rows = self::get_all("Desc `{$table}`"); - $fields = array(); - foreach ($rows as $k => $v) - { - // 过滤自增主键 - // if ($v['Key'] != 'PRI') - if ($v['Extra'] != 'auto_increment') - { - $fields[] = $v['Field']; - } - } - return $fields; - } - - public static function table_exists($table_name) - { - $sql = "SHOW TABLES LIKE '" . $table_name . "'"; - $rsid = self::query($sql); - $table = self::fetch($rsid); - if (empty($table)) - { - return false; - } - return true; - } -} - - - - - - diff --git a/vendor/owner888/phpspider/core/init.php b/vendor/owner888/phpspider/core/init.php deleted file mode 100644 index 7bbb2aa..0000000 --- a/vendor/owner888/phpspider/core/init.php +++ /dev/null @@ -1,101 +0,0 @@ - -// +---------------------------------------------------------------------- - -//---------------------------------- -// PHPSpider公共入口文件 -//---------------------------------- - -// 严格开发模式 -error_reporting( E_ALL ); -//ini_set('display_errors', 1); - -// 永不超时 -ini_set('max_execution_time', 0); -set_time_limit(0); -// 内存限制,如果外面设置的内存比 /etc/php/php-cli.ini 大,就不要设置了 -if (intval(ini_get("memory_limit")) < 1024) -{ - ini_set('memory_limit', '1024M'); -} - -if( PHP_SAPI != 'cli' ) -{ - exit("You must run the CLI environment\n"); -} - -// 设置时区 -date_default_timezone_set('Asia/Shanghai'); - -// 引入PATH_DATA -require_once __DIR__ . '/constants.php'; -// 核心库目录 -define('CORE', dirname(__FILE__)); -define('PATH_ROOT', CORE."/../"); -define('PATH_DATA', CORE."/../data"); -define('PATH_LIBRARY', CORE."/../library"); - -// 系统配置 -if( file_exists( PATH_ROOT."/config/inc_config.php" ) ) -{ - require PATH_ROOT."/config/inc_config.php"; -} -require CORE.'/log.php'; -require CORE.'/requests.php'; -require CORE.'/selector.php'; -require CORE.'/util.php'; -require CORE.'/db.php'; -require CORE.'/cache.php'; -require CORE."/worker.php"; -require CORE."/phpspider.php"; - -// 启动的时候生成data目录 -util::path_exists(PATH_DATA); -util::path_exists(PATH_DATA."/lock"); -util::path_exists(PATH_DATA."/log"); -util::path_exists(PATH_DATA."/cache"); -util::path_exists(PATH_DATA."/status"); - -function autoload($classname) { - set_include_path(PATH_ROOT.'/library/'); - spl_autoload($classname); //replaces include/require -} - -spl_autoload_extensions('.php'); -spl_autoload_register('autoload'); - -/** - * 自动加载类库处理 - * @return void - */ -//function __autoload( $classname ) -//{ - //$classname = preg_replace("/[^0-9a-z_]/i", '', $classname); - //if( class_exists ( $classname ) ) { - //return true; - //} - //$classfile = $classname.'.php'; - //try - //{ - //if ( file_exists ( PATH_LIBRARY.'/'.$classfile ) ) - //{ - //require PATH_LIBRARY.'/'.$classfile; - //} - //else - //{ - //throw new Exception ( 'Error: Cannot find the '.$classname ); - //} - //} - //catch ( Exception $e ) - //{ - //log::error($e->getMessage().'|'.$classname); - //exit(); - //} -//} diff --git a/vendor/owner888/phpspider/core/log.php b/vendor/owner888/phpspider/core/log.php deleted file mode 100644 index b4c4cf0..0000000 --- a/vendor/owner888/phpspider/core/log.php +++ /dev/null @@ -1,119 +0,0 @@ - -// +---------------------------------------------------------------------- - -//---------------------------------- -// PHPSpider日志类文件 -//---------------------------------- - -namespace phpspider\core; -// 引入PATH_DATA -require_once __DIR__ . '/constants.php'; - -class log -{ - public static $log_show = false; - public static $log_type = false; - public static $log_file = "data/phpspider.log"; - public static $out_sta = ""; - public static $out_end = ""; - - public static function note($msg) - { - self::$out_sta = self::$out_end = ""; - self::msg($msg, 'note'); - } - - public static function info($msg) - { - self::$out_sta = self::$out_end = ""; - self::msg($msg, 'info'); - } - - public static function warn($msg) - { - self::$out_sta = self::$out_end = ""; - if (!util::is_win()) - { - self::$out_sta = "\033[33m"; - self::$out_end = "\033[0m"; - } - - self::msg($msg, 'warn'); - } - - public static function debug($msg) - { - self::$out_sta = self::$out_end = ""; - if (!util::is_win()) - { - self::$out_sta = "\033[36m"; - self::$out_end = "\033[0m"; - } - - self::msg($msg, 'debug'); - } - - public static function error($msg) - { - self::$out_sta = self::$out_end = ""; - if (!util::is_win()) - { - self::$out_sta = "\033[31m"; - self::$out_end = "\033[0m"; - } - - self::msg($msg, 'error'); - } - - public static function msg($msg, $log_type) - { - if ($log_type != 'note' && self::$log_type && strpos(self::$log_type, $log_type) === false) - { - return false; - } - - if ($log_type == 'note') - { - $msg = self::$out_sta. $msg . "\n".self::$out_end; - } - else - { - $msg = self::$out_sta.date("Y-m-d H:i:s")." [{$log_type}] " . $msg .self::$out_end. "\n"; - } - if(self::$log_show) - { - echo $msg; - } - file_put_contents(self::$log_file, $msg, FILE_APPEND | LOCK_EX); - } - - /** - * 记录日志 XXX - * @param string $msg - * @param string $log_type Note|Warning|Error - * @return void - */ - public static function add($msg, $log_type = '') - { - if ($log_type != '') - { - $msg = date("Y-m-d H:i:s")." [{$log_type}] " . $msg . "\n"; - } - if(self::$log_show) - { - echo $msg; - } - //file_put_contents(PATH_DATA."/log/".strtolower($log_type).".log", $msg, FILE_APPEND | LOCK_EX); - file_put_contents(PATH_DATA."/log/error.log", $msg, FILE_APPEND | LOCK_EX); - } - -} - diff --git a/vendor/owner888/phpspider/core/phpspider.bak20170807.php b/vendor/owner888/phpspider/core/phpspider.bak20170807.php deleted file mode 100644 index e747237..0000000 --- a/vendor/owner888/phpspider/core/phpspider.bak20170807.php +++ /dev/null @@ -1,2870 +0,0 @@ - -// +---------------------------------------------------------------------- - -//---------------------------------- -// PHPSpider核心类文件 -//---------------------------------- - -namespace phpspider\core; - -require_once __DIR__ . '/constants.php'; - -use phpspider\core\requests; -use phpspider\core\selector; -use phpspider\core\queue; -use phpspider\core\db; -use phpspider\core\util; -use phpspider\core\log; -use Exception; - -//require CORE.'/log.php'; -//require CORE.'/requests.php'; -//require CORE.'/selector.php'; -//require CORE.'/util.php'; -//require CORE.'/db.php'; -//require CORE.'/cache.php'; -//require CORE."/worker.php"; -//require CORE."/phpspider.php"; - -// 启动的时候生成data目录 -util::path_exists(PATH_DATA); -util::path_exists(PATH_DATA."/lock"); -util::path_exists(PATH_DATA."/log"); -util::path_exists(PATH_DATA."/cache"); -util::path_exists(PATH_DATA."/status"); - -class phpspider -{ - /** - * 版本号 - * @var string - */ - const VERSION = '3.0.4'; - - /** - * 爬虫爬取每个网页的时间间隔,0表示不延时, 单位: 毫秒 - */ - const INTERVAL = 0; - - /** - * 爬虫爬取每个网页的超时时间, 单位: 秒 - */ - const TIMEOUT = 5; - - /** - * 爬取失败次数, 不想失败重新爬取则设置为0 - */ - const MAX_TRY = 0; - - /** - * 爬虫爬取网页所使用的浏览器类型: pc、ios、android - * 默认类型是PC - */ - const AGENT_PC = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36"; - const AGENT_IOS = "Mozilla/5.0 (iPhone; CPU iPhone OS 9_3_3 like Mac OS X) AppleWebKit/601.1.46 (KHTML, like Gecko) Version/9.0 Mobile/13G34 Safari/601.1"; - const AGENT_ANDROID = "Mozilla/5.0 (Linux; U; Android 6.0.1;zh_cn; Le X820 Build/FEXCNFN5801507014S) AppleWebKit/537.36 (KHTML, like Gecko)Version/4.0 Chrome/49.0.0.0 Mobile Safari/537.36 EUI Browser/5.8.015S"; - - /** - * pid文件的路径及名称 - * @var string - */ - //public static $pid_file = ''; - - /** - * 日志目录, 默认在data根目录下 - * @var mixed - */ - //public static $log_file = ''; - - /** - * 主任务进程ID - */ - //public static $master_pid = 0; - - /** - * 所有任务进程ID - */ - //public static $taskpids = array(); - - /** - * Daemonize. - * - * @var bool - */ - public static $daemonize = false; - - /** - * 当前进程是否终止 - */ - public static $terminate = false; - - /** - * 是否分布式 - */ - public static $multiserver = false; - - /** - * 当前服务器ID - */ - public static $serverid = 1; - - /** - * 主任务进程 - */ - public static $taskmaster = true; - - /** - * 当前任务ID - */ - public static $taskid = 1; - - /** - * 当前任务进程ID - */ - public static $taskpid = 1; - - /** - * 并发任务数 - */ - public static $tasknum = 1; - - /** - * 生成 - */ - public static $fork_task_complete = false; - - /** - * 是否使用Redis - */ - public static $use_redis = false; - - /** - * 是否保存爬虫运行状态 - */ - public static $save_running_state = false; - - /** - * 配置 - */ - public static $configs = array(); - - /** - * 要抓取的URL队列 - md5(url) => array( - 'url' => '', // 要爬取的URL - 'url_type' => '', // 要爬取的URL类型,scan_page、list_page、content_page - 'method' => 'get', // 默认为"GET"请求, 也支持"POST"请求 - 'headers' => array(), // 此url的Headers, 可以为空 - 'params' => array(), // 发送请求时需添加的参数, 可以为空 - 'context_data'=> '', // 此url附加的数据, 可以为空 - 'proxies' => false, // 是否使用代理 - 'try_num' => 0 // 抓取次数 - 'max_try' => 0 // 允许抓取失败次数 - ) - */ - public static $collect_queue = array(); - - /** - * 要抓取的URL数组 - * md5($url) => time() - */ - public static $collect_urls = array(); - - /** - * 要抓取的URL数量 - */ - public static $collect_urls_num = 0; - - /** - * 已经抓取的URL数量 - */ - public static $collected_urls_num = 0; - - /** - * 当前进程采集成功数 - */ - public static $collect_succ = 0; - - /** - * 当前进程采集失败数 - */ - public static $collect_fail = 0; - - /** - * 提取到的字段数 - */ - public static $fields_num = 0; - - /** - * 采集深度 - */ - public static $depth_num = 0; - - /** - * 爬虫开始时间 - */ - public static $time_start = 0; - - /** - * 任务状态 - */ - public static $task_status = array(); - - // 导出类型配置 - public static $export_type = ''; - public static $export_file = ''; - public static $export_conf = ''; - public static $export_table = ''; - - // 数据库配置 - public static $db_config = array(); - // 队列配置 - public static $queue_config = array(); - - // 运行面板参数长度 - public static $server_length = 10; - public static $tasknum_length = 8; - public static $taskid_length = 8; - public static $pid_length = 8; - public static $mem_length = 8; - public static $urls_length = 15; - public static $speed_length = 6; - - /** - * 爬虫初始化时调用, 用来指定一些爬取前的操作 - * - * @var mixed - * @access public - */ - public $on_start = null; - - /** - * 网页状态码回调 - * - * @var mixed - * @access public - */ - public $on_status_code = null; - - /** - * 判断当前网页是否被反爬虫, 需要开发者实现 - * - * @var mixed - * @access public - */ - public $is_anti_spider = null; - - /** - * 在一个网页下载完成之后调用, 主要用来对下载的网页进行处理 - * - * @var mixed - * @access public - */ - public $on_download_page = null; - - /** - * 在一个attached_url对应的网页下载完成之后调用. 主要用来对下载的网页进行处理 - * - * @var mixed - * @access public - */ - public $on_download_attached_page = null; - - /** - * 当前页面抽取到URL - * - * @var mixed - * @access public - */ - public $on_fetch_url = null; - - /** - * URL属于入口页 - * 在爬取到入口url的内容之后, 添加新的url到待爬队列之前调用 - * 主要用来发现新的待爬url, 并且能给新发现的url附加数据 - * - * @var mixed - * @access public - */ - public $on_scan_page = null; - - /** - * URL属于列表页 - * 在爬取到列表页url的内容之后, 添加新的url到待爬队列之前调用 - * 主要用来发现新的待爬url, 并且能给新发现的url附加数据 - * - * @var mixed - * @access public - */ - public $on_list_page = null; - - /** - * URL属于内容页 - * 在爬取到内容页url的内容之后, 添加新的url到待爬队列之前调用 - * 主要用来发现新的待爬url, 并且能给新发现的url附加数据 - * - * @var mixed - * @access public - */ - public $on_content_page = null; - - /** - * 在抽取到field内容之后调用, 对其中包含的img标签进行回调处理 - * - * @var mixed - * @access public - */ - public $on_handle_img = null; - - /** - * 当一个field的内容被抽取到后进行的回调, 在此回调中可以对网页中抽取的内容作进一步处理 - * - * @var mixed - * @access public - */ - public $on_extract_field = null; - - /** - * 在一个网页的所有field抽取完成之后, 可能需要对field进一步处理, 以发布到自己的网站 - * - * @var mixed - * @access public - */ - public $on_extract_page = null; - - /** - * 如果抓取的页面是一个附件文件, 比如图片、视频、二进制文件、apk、ipad、exe - * 就不去分析他的内容提取field了, 提取field只针对HTML - * - * @var mixed - * @access public - */ - public $on_attachment_file = null; - - function __construct($configs = array()) - { - // 产生时钟云,解决php7下面ctrl+c无法停止bug - declare(ticks = 1); - - // 先打开以显示验证报错内容 - log::$log_show = true; - log::$log_file = isset($configs['log_file']) ? $configs['log_file'] : PATH_DATA.'/phpspider.log'; - log::$log_type = isset($configs['log_type']) ? $configs['log_type'] : false; - - // 彩蛋 - $included_files = get_included_files(); - $content = file_get_contents($included_files[0]); - if (!preg_match("#/\* Do NOT delete this comment \*/#", $content) || !preg_match("#/\* 不要删除这段注释 \*/#", $content)) - { - $msg = "Unknown error..."; - log::error($msg); - exit; - } - - $configs['name'] = isset($configs['name']) ? $configs['name'] : 'phpspider'; - $configs['proxies'] = isset($configs['proxies']) ? $configs['proxies'] : ''; - $configs['user_agent'] = isset($configs['user_agent']) ? $configs['user_agent'] : self::AGENT_PC; - $configs['user_agents'] = isset($configs['user_agents']) ? $configs['user_agents'] : null; - $configs['client_ip'] = isset($configs['client_ip']) ? $configs['client_ip'] : null; - $configs['client_ips'] = isset($configs['client_ips']) ? $configs['client_ips'] : null; - $configs['interval'] = isset($configs['interval']) ? $configs['interval'] : self::INTERVAL; - $configs['timeout'] = isset($configs['timeout']) ? $configs['timeout'] : self::TIMEOUT; - $configs['max_try'] = isset($configs['max_try']) ? $configs['max_try'] : self::MAX_TRY; - $configs['max_depth'] = isset($configs['max_depth']) ? $configs['max_depth'] : 0; - $configs['max_fields'] = isset($configs['max_fields']) ? $configs['max_fields'] : 0; - $configs['export'] = isset($configs['export']) ? $configs['export'] : array(); - - // csv、sql、db - self::$export_type = isset($configs['export']['type']) ? $configs['export']['type'] : ''; - self::$export_file = isset($configs['export']['file']) ? $configs['export']['file'] : ''; - self::$export_table = isset($configs['export']['table']) ? $configs['export']['table'] : ''; - self::$db_config = isset($configs['db_config']) ? $configs['db_config'] : array(); - self::$queue_config = isset($configs['queue_config']) ? $configs['queue_config'] : array(); - - // 是否设置了并发任务数, 并且大于1, 而且不是windows环境 - if (isset($configs['tasknum']) && $configs['tasknum'] > 1 && !util::is_win()) - { - self::$tasknum = $configs['tasknum']; - } - - // 是否设置了保留运行状态 - if (isset($configs['save_running_state'])) - { - self::$save_running_state = $configs['save_running_state']; - } - - // 是否分布式 - if (isset($configs['multiserver'])) - { - self::$multiserver = $configs['multiserver']; - } - - // 当前服务器ID - if (isset($configs['serverid'])) - { - self::$serverid = $configs['serverid']; - } - - // 不同项目的采集以采集名称作为前缀区分 - if (isset($GLOBALS['config']['redis']['prefix'])) - { - $GLOBALS['config']['redis']['prefix'] = $GLOBALS['config']['redis']['prefix'].'-'.md5($configs['name']); - } - - self::$configs = $configs; - } - - public function add_scan_url($url, $options = array(), $allowed_repeat = true) - { - // 投递状态 - $status = false; - - $link = $options; - $link['url'] = $url; - $link['url_type'] = 'scan_page'; - $link = $this->link_uncompress($link); - - if ($this->is_list_page($url)) - { - $link['url_type'] = 'list_page'; - $status = $this->queue_lpush($link, $allowed_repeat); - } - elseif ($this->is_content_page($url)) - { - $link['url_type'] = 'content_page'; - $status = $this->queue_lpush($link, $allowed_repeat); - } - else - { - $status = $this->queue_lpush($link, $allowed_repeat); - } - - if ($status) - { - if ($link['url_type'] == 'scan_page') - { - log::debug("Find scan page: {$url}"); - } - elseif ($link['url_type'] == 'list_page') - { - log::debug("Find list page: {$url}"); - } - elseif ($link['url_type'] == 'content_page') - { - log::debug("Find content page: {$url}"); - } - } - - return $status; - } - - /** - * 一般在 on_scan_page 和 on_list_page 回调函数中调用, 用来往待爬队列中添加url - * 两个进程同时调用这个方法, 传递相同url的时候, 就会出现url重复进入队列 - * - * @param mixed $url - * @param mixed $options - * @return void - * @author seatle - * @created time :2016-09-18 10:17 - */ - public function add_url($url, $options = array(), $depth = 0) - { - // 投递状态 - $status = false; - - $link = $options; - $link['url'] = $url; - $link['depth'] = $depth; - $link = $this->link_uncompress($link); - - if ($this->is_list_page($url)) - { - $link['url_type'] = 'list_page'; - $status = $this->queue_lpush($link); - } - - if ($this->is_content_page($url)) - { - $link['url_type'] = 'content_page'; - $status = $this->queue_lpush($link); - } - - if ($status) - { - if ($link['url_type'] == 'scan_page') - { - log::debug("Find scan page: {$url}"); - } - elseif ($link['url_type'] == 'list_page') - { - log::debug("Find list page: {$url}"); - } - elseif ($link['url_type'] == 'content_page') - { - log::debug("Find content page: {$url}"); - } - } - - return $status; - } - - /** - * 是否入口页面 - * - * @param mixed $url - * @return void - * @author seatle - * @created time :2016-10-12 19:06 - */ - public function is_scan_page($url) - { - $parse_url = parse_url($url); - if (empty($parse_url['host']) || !in_array($parse_url['host'], self::$configs['domains'])) - { - return false; - } - return true; - } - - /** - * 是否列表页面 - * - * @param mixed $url - * @return void - * @author seatle - * @created time :2016-10-12 19:06 - */ - public function is_list_page($url) - { - $result = false; - if (!empty(self::$configs['list_url_regexes'])) - { - foreach (self::$configs['list_url_regexes'] as $regex) - { - if (preg_match("#{$regex}#i", $url)) - { - $result = true; - break; - } - } - } - return $result; - } - - /** - * 是否内容页面 - * - * @param mixed $url - * @return void - * @author seatle - * @created time :2016-10-12 19:06 - */ - public function is_content_page($url) - { - $result = false; - if (!empty(self::$configs['content_url_regexes'])) - { - foreach (self::$configs['content_url_regexes'] as $regex) - { - if (preg_match("#{$regex}#i", $url)) - { - $result = true; - break; - } - } - } - return $result; - } - - /** - * Parse command. - * php yourfile.php start | stop | status | kill - * - * @return void - */ - public function parse_command() - { - // 检查运行命令的参数 - global $argv; - $start_file = $argv[0]; - - // 命令 - $command = isset($argv[1]) ? trim($argv[1]) : 'start'; - - // 子命令, 目前只支持-d - $command2 = isset($argv[2]) ? $argv[2] : ''; - - // 根据命令做相应处理 - switch($command) - { - // 启动 phpspider - case 'start': - if ($command2 === '-d') - { - self::$daemonize = true; - } - break; - case 'stop': - exec("ps aux | grep $start_file | grep -v grep | awk '{print $2}'", $info); - if (count($info) <= 1) - { - echo "PHPSpider[$start_file] not run\n"; - } - else - { - //echo "PHPSpider[$start_file] is stoping ...\n"; - echo "PHPSpider[$start_file] stop success"; - exec("ps aux | grep $start_file | grep -v grep | awk '{print $2}' |xargs kill -SIGINT", $info); - } - exit; - break; - case 'kill': - exec("ps aux | grep $start_file | grep -v grep | awk '{print $2}' |xargs kill -SIGKILL"); - break; - // 显示 phpspider 运行状态 - case 'status': - exit(0); - // 未知命令 - default : - exit("Usage: php yourfile.php {start|stop|status|kill}\n"); - } - } - - /** - * Signal hander. - * - * @param int $signal - */ - public function signal_handler($signal) - { - switch ($signal) { - // Stop. - case SIGINT: - log::warn("Program stopping..."); - self::$terminate = true; - break; - // Show status. - case SIGUSR2: - echo "show status\n"; - break; - } - } - - /** - * Install signal handler. - * - * @return void - */ - public function install_signal() - { - if (function_exists('pcntl_signal')) - { - // stop - pcntl_signal(SIGINT, array(__CLASS__, 'signal_handler'), false); - // status - pcntl_signal(SIGUSR2, array(__CLASS__, 'signal_handler'), false); - // ignore - pcntl_signal(SIGPIPE, SIG_IGN, false); - } - } - - /** - * Run as deamon mode. - * - * @throws Exception - */ - protected static function daemonize() - { - if (!self::$daemonize) - { - return; - } - - // fork前一定要关闭redis - queue::clear_link(); - - umask(0); - $pid = pcntl_fork(); - if (-1 === $pid) - { - throw new Exception('fork fail'); - } - elseif ($pid > 0) - { - exit(0); - } - if (-1 === posix_setsid()) - { - throw new Exception("setsid fail"); - } - // Fork again avoid SVR4 system regain the control of terminal. - $pid = pcntl_fork(); - if (-1 === $pid) - { - throw new Exception("fork fail"); - } - elseif (0 !== $pid) - { - exit(0); - } - } - - /** - * 检查是否终止当前进程 - * - * @return void - * @author seatle - * @created time :2016-11-16 11:06 - */ - public function check_terminate() - { - if (!self::$terminate) - { - return false; - } - - // 删除当前任务状态 - $this->del_task_status(self::$serverid, self::$taskid); - - if (self::$taskmaster) - { - // 检查子进程是否都退出 - while (true) - { - $all_stop = true; - for ($i = 2; $i <= self::$tasknum; $i++) - { - // 只要一个还活着就说明没有完全退出 - $task_status = $this->get_task_status(self::$serverid, $i); - if ($task_status) - { - $all_stop = false; - } - } - if ($all_stop) - { - break; - } - else - { - log::warn("Task stop waiting..."); - } - sleep(1); - } - - $this->del_server_list(self::$serverid); - - // 显示最后结果 - log::$log_show = true; - - $spider_time_run = util::time2second(intval(microtime(true) - self::$time_start)); - log::note("Spider finished in {$spider_time_run}"); - - $get_collected_url_num = $this->get_collected_url_num(); - log::note("Total pages: {$get_collected_url_num} \n"); - } - exit(); - } - - public function start() - { - $this->parse_command(); - - // 爬虫开始时间 - self::$time_start = time(); - // 当前任务ID - self::$taskid = 1; - // 当前任务进程ID - self::$taskpid = function_exists('posix_getpid') ? posix_getpid() : 1; - self::$collect_succ = 0; - self::$collect_fail = 0; - - //-------------------------------------------------------------------------------- - // 运行前验证 - //-------------------------------------------------------------------------------- - - // 检查PHP版本 - if (version_compare(PHP_VERSION, '5.3.0', 'lt')) - { - log::error('PHP 5.3+ is required, currently installed version is: ' . phpversion()); - exit; - } - - // 检查CURL扩展 - if(!function_exists('curl_init')) - { - log::error("The curl extension was not found"); - exit; - } - - // 多任务需要pcntl扩展支持 - if (self::$tasknum > 1 && !function_exists('pcntl_fork')) - { - log::error("Multitasking needs pcntl, the pcntl extension was not found"); - exit; - } - - // 守护进程需要pcntl扩展支持 - if (self::$daemonize && !function_exists('pcntl_fork')) - { - log::error("Daemonize needs pcntl, the pcntl extension was not found"); - exit; - } - - // 集群、保存运行状态、多任务都需要Redis支持 - if (self::$multiserver || self::$save_running_state || self::$tasknum > 1) - { - self::$use_redis = true; - - queue::set_connect('default', self::$queue_config); - if (!queue::init()) - { - if (self::$multiserver) - { - log::error("Multiserver needs Redis support, ".queue::$error); - exit; - } - - if (self::$tasknum > 1) - { - log::error("Multitasking needs Redis support, ".queue::$error); - exit; - } - - if (self::$save_running_state) - { - log::error("Spider kept running state needs Redis support, ".queue::$error); - exit; - } - } - } - - // 检查导出 - $this->check_export(); - - // 检查缓存 - $this->check_cache(); - - // 检查 scan_urls - if (empty(self::$configs['scan_urls'])) - { - log::error("No scan url to start"); - exit; - } - - foreach ( self::$configs['scan_urls'] as $url ) - { - // 只检查配置中的入口URL, 通过 add_scan_url 添加的不检查了. - if (!$this->is_scan_page($url)) - { - log::error("Domain of scan_urls (\"{$url}\") does not match the domains of the domain name"); - exit; - } - } - - // windows 下没法显示面板, 强制显示日志 - if (util::is_win()) - { - self::$configs['name'] = iconv("UTF-8", "GB2312//IGNORE", self::$configs['name']); - log::$log_show = true; - } - else - { - log::$log_show = isset(self::$configs['log_show']) ? self::$configs['log_show'] : false; - } - - if (self::$daemonize) - { - log::$log_show = true; - } - - if (log::$log_show) - { - global $argv; - $start_file = $argv[0]; - - $header = ""; - if (!util::is_win()) $header .= "\033[33m"; - $header .= "\n[ ".self::$configs['name']." Spider ] is started...\n\n"; - $header .= " * PHPSpider Version: ".self::VERSION."\n"; - $header .= " * Documentation: https://doc.phpspider.org\n"; - $header .= " * Task Number: ".self::$tasknum."\n\n"; - $header .= "Input \"php $start_file stop\" to quit. Start success.\n"; - if (!util::is_win()) $header .= "\033[0m"; - log::note($header); - } - - // 如果是守护进程,恢复日志状态 - if (self::$daemonize) - { - log::$log_show = isset(self::$configs['log_show']) ? self::$configs['log_show'] : false; - } - - // 多任务和分布式都要清掉, 当然分布式只清自己的 - $this->init_redis(); - - //-------------------------------------------------------------------------------- - // 生成多任务 - //-------------------------------------------------------------------------------- - - // 添加入口URL到队列 - foreach ( self::$configs['scan_urls'] as $url ) - { - // false 表示不允许重复 - $this->add_scan_url($url, null, false); - } - - // 放这个位置, 可以添加入口页面 - if ($this->on_start) - { - call_user_func($this->on_start, $this); - } - - if (!self::$daemonize) - { - if (!log::$log_show) - { - // 第一次先清屏 - $this->clear_echo(); - - // 先显示一次面板, 然后下面再每次采集成功显示一次 - $this->display_ui(); - } - } - else - { - $this->daemonize(); - } - - // 安装信号 - $this->install_signal(); - - // 开始采集 - $this->do_collect_page(); - - // 从服务器列表中删除当前服务器信息 - $this->del_server_list(self::$serverid); - } - - /** - * 创建一个子进程 - * @param Worker $worker - * @throws Exception - */ - public function fork_one_task($taskid) - { - $pid = pcntl_fork(); - - // 主进程记录子进程pid - if($pid > 0) - { - // 暂时没用 - //self::$taskpids[$taskid] = $pid; - } - // 子进程运行 - elseif(0 === $pid) - { - log::warn("Fork children task({$taskid}) successful..."); - - // 初始化子进程参数 - self::$time_start = microtime(true); - self::$taskid = $taskid; - self::$taskmaster = false; - self::$taskpid = posix_getpid(); - self::$collect_succ = 0; - self::$collect_fail = 0; - - $this->do_collect_page(); - - // 这里用0表示正常退出 - exit(0); - } - else - { - log::error("Fork children task({$taskid}) fail..."); - exit; - } - } - - public function do_collect_page() - { - queue::set_connect('default', self::$queue_config); - queue::init(); - while( $queue_lsize = $this->queue_lsize() ) - { - // 如果是主任务 - if (self::$taskmaster) - { - // 多任务下主任务未准备就绪 - if (self::$tasknum > 1 && !self::$fork_task_complete) - { - // 主进程采集到两倍于任务数时, 生成子任务一起采集 - if ( $queue_lsize > self::$tasknum*2 ) - { - self::$fork_task_complete = true; - - // fork 子进程前一定要先干掉redis连接fd, 不然会存在进程互抢redis fd 问题 - queue::clear_link(); - // task进程从2开始, 1被master进程所使用 - for ($i = 2; $i <= self::$tasknum; $i++) - { - $this->fork_one_task($i); - } - } - } - - // 抓取页面 - $this->collect_page(); - // 保存任务状态 - $this->set_task_status(); - - // 每采集成功一次页面, 就刷新一次面板 - if (!log::$log_show && !self::$daemonize) - { - $this->display_ui(); - } - } - // 如果是子任务 - else - { - // 如果队列中的网页比任务数2倍多, 子任务可以采集, 否则等待... - if ( $queue_lsize > self::$tasknum*2 ) - { - // 抓取页面 - $this->collect_page(); - // 保存任务状态 - $this->set_task_status(); - } - else - { - log::warn("Task(".self::$taskid.") waiting..."); - sleep(1); - } - } - - // 检查进程是否收到关闭信号 - $this->check_terminate(); - } - } - - /** - * 爬取页面 - * - * @param mixed $collect_url 要抓取的链接 - * @return void - * @author seatle - * @created time :2016-09-18 10:17 - */ - public function collect_page() - { - $get_collect_url_num = $this->get_collect_url_num(); - log::info("Find pages: {$get_collect_url_num} "); - - $queue_lsize = $this->queue_lsize(); - log::info("Waiting for collect pages: {$queue_lsize} "); - - $get_collected_url_num = $this->get_collected_url_num(); - log::info("Collected pages: {$get_collected_url_num} "); - - // 多任务的时候输出爬虫序号 - if (self::$tasknum > 1) - { - log::info("Current task id: ".self::$taskid); - } - - // 先进先出 - $link = $this->queue_rpop(); - $link = $this->link_uncompress($link); - $url = $link['url']; - - // 标记为已爬取网页 - $this->incr_collected_url_num($url); - - // 爬取页面开始时间 - $page_time_start = microtime(true); - - requests::$input_encoding = null; - $html = $this->request_url($url, $link); - - if (!$html) - { - return false; - } - // 当前正在爬取的网页页面的对象 - $page = array( - 'url' => $url, - 'raw' => $html, - 'request' => array( - 'url' => $url, - 'method' => $link['method'], - 'headers' => $link['headers'], - 'params' => $link['params'], - 'context_data' => $link['context_data'], - 'try_num' => $link['try_num'], - 'max_try' => $link['max_try'], - 'depth' => $link['depth'], - 'taskid' => self::$taskid, - ), - ); - unset($html); - - //-------------------------------------------------------------------------------- - // 处理回调函数 - //-------------------------------------------------------------------------------- - - // 判断当前网页是否被反爬虫了, 需要开发者实现 - if ($this->is_anti_spider) - { - $is_anti_spider = call_user_func($this->is_anti_spider, $url, $page['raw'], $this); - // 如果在回调函数里面判断被反爬虫并且返回true - if ($is_anti_spider) - { - return false; - } - } - - // 在一个网页下载完成之后调用. 主要用来对下载的网页进行处理. - // 比如下载了某个网页, 希望向网页的body中添加html标签 - if ($this->on_download_page) - { - $return = call_user_func($this->on_download_page, $page, $this); - // 针对那些老是忘记return的人 - if (isset($return)) $page = $return; - } - - // 是否从当前页面分析提取URL - // 回调函数如果返回false表示不需要再从此网页中发现待爬url - $is_find_url = true; - if ($link['url_type'] == 'scan_page') - { - if ($this->on_scan_page) - { - $return = call_user_func($this->on_scan_page, $page, $page['raw'], $this); - if (isset($return)) $is_find_url = $return; - } - } - elseif ($link['url_type'] == 'list_page') - { - if ($this->on_list_page) - { - $return = call_user_func($this->on_list_page, $page, $page['raw'], $this); - if (isset($return)) $is_find_url = $return; - } - } - elseif ($link['url_type'] == 'content_page') - { - if ($this->on_content_page) - { - $return = call_user_func($this->on_content_page, $page, $page['raw'], $this); - if (isset($return)) $is_find_url = $return; - } - } - - // on_scan_page、on_list_page、on_content_page 返回false表示不需要再从此网页中发现待爬url - if ($is_find_url) - { - // 如果深度没有超过最大深度, 获取下一级URL - if (self::$configs['max_depth'] == 0 || $link['depth'] < self::$configs['max_depth']) - { - // 分析提取HTML页面中的URL - $this->get_urls($page['raw'], $url, $link['depth'] + 1); - } - } - - // 如果是内容页, 分析提取HTML页面中的字段 - // 列表页也可以提取数据的, source_type: urlcontext, 未实现 - if ($link['url_type'] == 'content_page') - { - $this->get_html_fields($page['raw'], $url, $page); - } - - // 如果当前深度大于缓存的, 更新缓存 - $this->incr_depth_num($link['depth']); - - // 处理页面耗时时间 - $time_run = round(microtime(true) - $page_time_start, 3); - log::debug("Success process page {$url} in {$time_run} s"); - - $spider_time_run = util::time2second(intval(microtime(true) - self::$time_start)); - log::info("Spider running in {$spider_time_run}"); - - // 爬虫爬取每个网页的时间间隔, 单位: 毫秒 - if (!isset(self::$configs['interval'])) - { - // 默认睡眠100毫秒, 太快了会被认为是ddos - self::$configs['interval'] = 100; - } - usleep(self::$configs['interval'] * 1000); - } - - /** - * 下载网页, 得到网页内容 - * - * @param mixed $url - * @param mixed $link - * @return void - * @author seatle - * @created time :2016-09-18 10:17 - */ - public function request_url($url, $link = array()) - { - $time_start = microtime(true); - - //$url = "http://www.qiushibaike.com/article/117568316"; - - // 设置了编码就不要让requests去判断了 - if (isset(self::$configs['input_encoding'])) - { - requests::$input_encoding = self::$configs['input_encoding']; - } - // 得到的编码如果不是utf-8的要转成utf-8, 因为xpath只支持utf-8 - requests::$output_encoding = 'utf-8'; - requests::set_timeout(self::$configs['timeout']); - requests::set_useragent(self::$configs['user_agent']); - if (self::$configs['user_agents']) - { - requests::set_useragents(self::$configs['user_agents']); - } - if (self::$configs['client_ip']) - { - requests::set_client_ip(self::$configs['client_ip']); - } - if (self::$configs['client_ips']) - { - requests::set_client_ips(self::$configs['client_ips']); - } - - // 是否设置了代理 - if (!empty($link['proxies'])) - { - requests::set_proxies($link['proxies']); - // 自动切换IP - requests::set_header('Proxy-Switch-Ip', 'yes'); - } - - // 如何设置了 HTTP Headers - if (!empty($link['headers'])) - { - foreach ($link['headers'] as $k=>$v) - { - requests::set_header($k, $v); - } - } - - $method = empty($link['method']) ? 'get' : strtolower($link['method']); - $params = empty($link['params']) ? array() : $link['params']; - $html = requests::$method($url, $params); - // 此url附加的数据不为空, 比如内容页需要列表页一些数据, 拼接到后面去 - if ($html && !empty($link['context_data'])) - { - $html .= $link['context_data']; - } - - $http_code = requests::$status_code; - - if ($this->on_status_code) - { - $return = call_user_func($this->on_status_code, $http_code, $url, $html, $this); - if (isset($return)) - { - $html = $return; - } - if (!$html) - { - return false; - } - } - - if ($http_code != 200) - { - // 如果是301、302跳转, 抓取跳转后的网页内容 - if ($http_code == 301 || $http_code == 302) - { - $info = requests::$info; - if (isset($info['redirect_url'])) - { - $url = $info['redirect_url']; - requests::$input_encoding = null; - $html = $this->request_url($url, $link); - if ($html && !empty($link['context_data'])) - { - $html .= $link['context_data']; - } - } - else - { - return false; - } - } - else - { - if ($http_code == 407) - { - // 扔到队列头部去, 继续采集 - $this->queue_rpush($link); - log::error("Failed to download page {$url}"); - self::$collect_fail++; - } - elseif (in_array($http_code, array('0','502','503','429'))) - { - // 采集次数加一 - $link['try_num']++; - // 抓取次数 小于 允许抓取失败次数 - if ( $link['try_num'] <= $link['max_try'] ) - { - // 扔到队列头部去, 继续采集 - $this->queue_rpush($link); - } - log::error("Failed to download page {$url}, retry({$link['try_num']})"); - } - else - { - log::error("Failed to download page {$url}"); - self::$collect_fail++; - } - log::error("HTTP CODE: {$http_code}"); - return false; - } - } - - // 爬取页面耗时时间 - $time_run = round(microtime(true) - $time_start, 3); - log::debug("Success download page {$url} in {$time_run} s"); - self::$collect_succ++; - - return $html; - } - - /** - * 分析提取HTML页面中的URL - * - * @param mixed $html HTML内容 - * @param mixed $collect_url 抓取的URL, 用来拼凑完整页面的URL - * @return void - * @author seatle - * @created time :2016-09-18 10:17 - */ - public function get_urls($html, $collect_url, $depth = 0) - { - //-------------------------------------------------------------------------------- - // 正则匹配出页面中的URL - //-------------------------------------------------------------------------------- - $urls = selector::select($html, '//a/@href'); - //preg_match_all("/ \r\n\t]{1,}/isU", $html, $matchs); - //$urls = array(); - //if (!empty($matchs[1])) - //{ - //foreach ($matchs[1] as $url) - //{ - //$urls[] = str_replace(array("\"", "'",'&'), array("",'','&'), $url); - //} - //} - - if (empty($urls)) - { - return false; - } - - // 如果页面上只有一个url,要把他转为数组,否则下面会报警告 - if (!is_array($urls)) - { - $urls = array($urls); - } - - foreach ($urls as $key=>$url) - { - $urls[$key] = str_replace(array("\"", "'",'&'), array("",'','&'), $url); - } - - //-------------------------------------------------------------------------------- - // 过滤和拼凑URL - //-------------------------------------------------------------------------------- - // 去除重复的RUL - $urls = array_unique($urls); - foreach ($urls as $k=>$url) - { - $url = trim($url); - if (empty($url)) - { - continue; - } - - $val = $this->fill_url($url, $collect_url); - if ($val) - { - $urls[$k] = $val; - } - else - { - unset($urls[$k]); - } - } - - if (empty($urls)) - { - return false; - } - - //-------------------------------------------------------------------------------- - // 把抓取到的URL放入队列 - //-------------------------------------------------------------------------------- - foreach ($urls as $url) - { - if ($this->on_fetch_url) - { - $return = call_user_func($this->on_fetch_url, $url, $this); - $url = isset($return) ? $return : $url; - unset($return); - - // 如果 on_fetch_url 返回 false,此URL不入队列 - if (!$url) - { - continue; - } - } - - // 把当前页当做找到的url的Referer页 - $options = array( - 'headers' => array( - 'Referer' => $collect_url, - ) - ); - $this->add_url($url, $options, $depth); - } - } - - /** - * 获得完整的连接地址 - * - * @param mixed $url 要检查的URL - * @param mixed $collect_url 从那个URL页面得到上面的URL - * @return void - * @author seatle - * @created time :2016-09-23 17:13 - */ - public function fill_url($url, $collect_url) - { - $url = trim($url); - $collect_url = trim($collect_url); - - // 排除JavaScript的连接 - //if (strpos($url, "javascript:") !== false) - if( preg_match("@^(javascript:|#|'|\")@i", $url) || $url == '') - { - return false; - } - // 排除没有被解析成功的语言标签 - if(substr($url, 0, 3) == '<%=') - { - return false; - } - - $parse_url = @parse_url($collect_url); - if (empty($parse_url['scheme']) || empty($parse_url['host'])) - { - return false; - } - // 过滤mailto、tel、sms、wechat、sinaweibo、weixin等协议 - if (!in_array($parse_url['scheme'], array("http", "https"))) - { - return false; - } - $scheme = $parse_url['scheme']; - $domain = $parse_url['host']; - $path = empty($parse_url['path']) ? '' : $parse_url['path']; - $base_url_path = $domain.$path; - $base_url_path = preg_replace("/\/([^\/]*)\.(.*)$/","/",$base_url_path); - $base_url_path = preg_replace("/\/$/",'',$base_url_path); - - $i = $path_step = 0; - $dstr = $pstr = ''; - $pos = strpos($url,'#'); - if($pos > 0) - { - // 去掉#和后面的字符串 - $url = substr($url, 0, $pos); - } - - // 京东变态的都是 //www.jd.com/111.html - if(substr($url, 0, 2) == '//') - { - $url = str_replace("//", "", $url); - } - // /1234.html - elseif($url[0] == '/') - { - $url = $domain.$url; - } - // ./1234.html、../1234.html 这种类型的 - elseif($url[0] == '.') - { - if(!isset($url[2])) - { - return false; - } - else - { - $urls = explode('/',$url); - foreach($urls as $u) - { - if( $u == '..' ) - { - $path_step++; - } - // 遇到 ., 不知道为什么不直接写$u == '.', 貌似一样的 - else if( $i < count($urls)-1 ) - { - $dstr .= $urls[$i].'/'; - } - else - { - $dstr .= $urls[$i]; - } - $i++; - } - $urls = explode('/',$base_url_path); - if(count($urls) <= $path_step) - { - return false; - } - else - { - $pstr = ''; - for($i=0;$i - * @created time :2016-11-05 18:58 - */ - public function link_compress($link) - { - if (empty($link['url_type'])) - { - unset($link['url_type']); - } - - if (empty($link['method']) || strtolower($link['method']) == 'get') - { - unset($link['method']); - } - - if (empty($link['headers'])) - { - unset($link['headers']); - } - - if (empty($link['params'])) - { - unset($link['params']); - } - - if (empty($link['context_data'])) - { - unset($link['context_data']); - } - - if (empty($link['proxies'])) - { - unset($link['proxies']); - } - - if (empty($link['try_num'])) - { - unset($link['try_num']); - } - - if (empty($link['max_try'])) - { - unset($link['max_try']); - } - - if (empty($link['depth'])) - { - unset($link['depth']); - } - //$json = json_encode($link); - //$json = gzdeflate($json); - return $link; - } - - /** - * 连接对象解压缩 - * - * @param mixed $link - * @return void - * @author seatle - * @created time :2016-11-05 18:58 - */ - public function link_uncompress($link) - { - $link = array( - 'url' => isset($link['url']) ? $link['url'] : '', - 'url_type' => isset($link['url_type']) ? $link['url_type'] : '', - 'method' => isset($link['method']) ? $link['method'] : 'get', - 'headers' => isset($link['headers']) ? $link['headers'] : array(), - 'params' => isset($link['params']) ? $link['params'] : array(), - 'context_data' => isset($link['context_data']) ? $link['context_data'] : '', - 'proxies' => isset($link['proxies']) ? $link['proxies'] : self::$configs['proxies'], - 'try_num' => isset($link['try_num']) ? $link['try_num'] : 0, - 'max_try' => isset($link['max_try']) ? $link['max_try'] : self::$configs['max_try'], - 'depth' => isset($link['depth']) ? $link['depth'] : 0, - ); - - return $link; - } - - /** - * 分析提取HTML页面中的字段 - * - * @param mixed $html - * @return void - * @author seatle - * @created time :2016-09-18 10:17 - */ - public function get_html_fields($html, $url, $page) - { - $fields = $this->get_fields(self::$configs['fields'], $html, $url, $page); - - if (!empty($fields)) - { - if ($this->on_extract_page) - { - $return = call_user_func($this->on_extract_page, $page, $fields); - if (!isset($return)) - { - log::warn("on_extract_page return value can't be empty"); - } - elseif (!is_array($return)) - { - log::warn("on_extract_page return value must be an array"); - } - else - { - $fields = $return; - } - } - - if (isset($fields) && is_array($fields)) - { - $fields_num = $this->incr_fields_num(); - if (self::$configs['max_fields'] != 0 && $fields_num > self::$configs['max_fields']) - { - exit(0); - } - - if (version_compare(PHP_VERSION,'5.4.0','<')) - { - $fields_str = json_encode($fields); - $fields_str = preg_replace_callback( "#\\\u([0-9a-f]{4})#i", function($matchs) { - return iconv('UCS-2BE', 'UTF-8', pack('H4', $matchs[1])); - }, $fields_str ); - } - else - { - $fields_str = json_encode($fields, JSON_UNESCAPED_UNICODE); - } - - if (util::is_win()) - { - $fields_str = mb_convert_encoding($fields_str, 'gb2312', 'utf-8'); - } - log::info("Result[{$fields_num}]: ".$fields_str); - - // 如果设置了导出选项 - if (!empty(self::$configs['export'])) - { - self::$export_type = isset(self::$configs['export']['type']) ? self::$configs['export']['type'] : ''; - if (self::$export_type == 'csv') - { - util::put_file(self::$export_file, util::format_csv($fields)."\n", FILE_APPEND); - } - elseif (self::$export_type == 'sql') - { - $sql = db::insert(self::$export_table, $fields, true); - util::put_file(self::$export_file, $sql.";\n", FILE_APPEND); - } - elseif (self::$export_type == 'db') - { - db::insert(self::$export_table, $fields); - } - } - } - } - } - - /** - * 根据配置提取HTML代码块中的字段 - * - * @param mixed $confs - * @param mixed $html - * @param mixed $page - * @return void - * @author seatle - * @created time :2016-09-23 17:13 - */ - public function get_fields($confs, $html, $url, $page) - { - $fields = array(); - foreach ($confs as $conf) - { - // 当前field抽取到的内容是否是有多项 - $repeated = isset($conf['repeated']) && $conf['repeated'] ? true : false; - // 当前field抽取到的内容是否必须有值 - $required = isset($conf['required']) && $conf['required'] ? true : false; - - if (empty($conf['name'])) - { - log::error("The field name is null, please check your \"fields\" and add the name of the field\n"); - exit; - } - - $values = array(); - // 如果定义抽取规则 - if (!empty($conf['selector'])) - { - // 如果这个field是上一个field的附带连接 - if (isset($conf['source_type']) && $conf['source_type']=='attached_url') - { - // 取出上个field的内容作为连接, 内容分页是不进队列直接下载网页的 - if (!empty($fields[$conf['attached_url']])) - { - $collect_url = $this->fill_url($fields[$conf['attached_url']], $url); - //log::debug("Find attached content page: {$collect_url}"); - $link['url'] = $collect_url; - $link = $this->link_uncompress($link); - requests::$input_encoding = null; - $html = $this->request_url($collect_url, $link); - // 在一个attached_url对应的网页下载完成之后调用. 主要用来对下载的网页进行处理. - if ($this->on_download_attached_page) - { - $return = call_user_func($this->on_download_attached_page, $html, $this); - if (isset($return)) - { - $html = $return; - } - } - - // 请求获取完分页数据后把连接删除了 - unset($fields[$conf['attached_url']]); - } - } - - // 没有设置抽取规则的类型 或者 设置为 xpath - if (!isset($conf['selector_type']) || $conf['selector_type']=='xpath') - { - $values = $this->get_fields_xpath($html, $conf['selector'], $conf['name']); - } - elseif ($conf['selector_type']=='css') - { - $values = $this->get_fields_css($html, $conf['selector'], $conf['name']); - } - elseif ($conf['selector_type']=='regex') - { - $values = $this->get_fields_regex($html, $conf['selector'], $conf['name']); - } - - // field不为空而且存在子配置 - if (!empty($values) && !empty($conf['children'])) - { - $child_values = array(); - // 父项抽取到的html作为子项的提取内容 - foreach ($values as $child_html) - { - // 递归调用本方法, 所以多少子项目都支持 - $child_value = $this->get_fields($conf['children'], $child_html, $url, $page); - if (!empty($child_value)) - { - $child_values[] = $child_value; - } - } - // 有子项就存子项的数组, 没有就存HTML代码块 - if (!empty($child_values)) - { - $values = $child_values; - } - } - } - - if (empty($values)) - { - // 如果值为空而且值设置为必须项, 跳出foreach循环 - if ($required) - { - // 清空整个 fields - $fields = array(); - break; - } - // 避免内容分页时attached_url拼接时候string + array了 - $fields[$conf['name']] = ''; - //$fields[$conf['name']] = array(); - } - else - { - if (is_array($values)) - { - if ($repeated) - { - $fields[$conf['name']] = $values; - } - else - { - $fields[$conf['name']] = $values[0]; - } - } - else - { - $fields[$conf['name']] = $values; - } - // 不重复抽取则只取第一个元素 - //$fields[$conf['name']] = $repeated ? $values : $values[0]; - } - } - - if (!empty($fields)) - { - foreach ($fields as $fieldname => $data) - { - $pattern = "/ \r\n\t]{1,}/isU"; - /*$pattern = "//i"; */ - // 在抽取到field内容之后调用, 对其中包含的img标签进行回调处理 - if ($this->on_handle_img && preg_match($pattern, $data)) - { - $return = call_user_func($this->on_handle_img, $fieldname, $data); - if (!isset($return)) - { - log::warn("on_handle_img return value can't be empty\n"); - } - else - { - // 有数据才会执行 on_handle_img 方法, 所以这里不要被替换没了 - $data = $return; - } - } - - // 当一个field的内容被抽取到后进行的回调, 在此回调中可以对网页中抽取的内容作进一步处理 - if ($this->on_extract_field) - { - $return = call_user_func($this->on_extract_field, $fieldname, $data, $page); - if (!isset($return)) - { - log::warn("on_extract_field return value can't be empty\n"); - } - else - { - // 有数据才会执行 on_extract_field 方法, 所以这里不要被替换没了 - $fields[$fieldname] = $return; - } - } - } - } - - return $fields; - } - - /** - * 验证导出 - * - * @return void - * @author seatle - * @created time :2016-10-02 23:37 - */ - public function check_export() - { - // 如果设置了导出选项 - if (!empty(self::$configs['export'])) - { - if (self::$export_type == 'csv') - { - if (empty(self::$export_file)) - { - log::error("Export data into CSV files need to Set the file path."); - exit; - } - } - elseif (self::$export_type == 'sql') - { - if (empty(self::$export_file)) - { - log::error("Export data into SQL files need to Set the file path."); - exit; - } - } - elseif (self::$export_type == 'db') - { - if (!function_exists('mysqli_connect')) - { - log::error("Export data to a database need Mysql support, Error: Unable to load mysqli extension."); - exit; - } - - if (empty(self::$db_config)) - { - log::error("Export data to a database need Mysql support, Error: You not set a config array for connect."); - exit; - } - - $config = self::$db_config; - @mysqli_connect($config['host'], $config['user'], $config['pass'], $config['name'], $config['port']); - if(mysqli_connect_errno()) - { - log::error("Export data to a database need Mysql support, Error: ".mysqli_connect_error()); - exit; - } - - db::set_connect('default', $config); - db::_init(); - - if (!db::table_exists(self::$export_table)) - { - log::error("Table ".self::$export_table." does not exist"); - exit; - } - } - } - } - - public function check_cache() - { - if (!self::$use_redis || self::$save_running_state) - { - return false; - } - - //if (queue::exists("collect_queue")) - $keys = queue::keys("*"); - $count = count($keys); - if ($count != 0) - { - // After this operation, 4,318 kB of additional disk space will be used. - // Do you want to continue? [Y/n] - //$msg = "发现Redis中有采集数据, 是否继续执行, 不继续则清空Redis数据重新采集\n"; - $msg = "Found that the data of Redis, no continue will empty Redis data start again\n"; - $msg .= "Do you want to continue? [Y/n]"; - fwrite(STDOUT, $msg); - $arg = strtolower(trim(fgets(STDIN))); - $arg = empty($arg) || !in_array($arg, array('y','n')) ? 'y' : $arg; - if ($arg == 'n') - { - foreach ($keys as $key) - { - $key = str_replace($GLOBALS['config']['redis']['prefix'].":", "", $key); - queue::del($key); - } - } - } - } - - public function init_redis() - { - if (!self::$use_redis) - { - return false; - } - - // 添加当前服务器到服务器列表 - $this->add_server_list(self::$serverid, self::$tasknum); - - // 删除当前服务器的任务状态 - // 对于被强制退出的进程有用 - for ($i = 1; $i <= self::$tasknum; $i++) - { - $this->del_task_status(self::$serverid, $i); - } - } - - /** - * 设置任务状态, 主进程和子进程每成功采集一个页面后调用 - * - * @return void - * @author seatle - * @created time :2016-10-30 23:56 - */ - public function set_task_status() - { - // 每采集成功一个页面, 生成当前进程状态到文件, 供主进程使用 - $mem = round(memory_get_usage(true)/(1024*1024),2); - $use_time = microtime(true) - self::$time_start; - $speed = round((self::$collect_succ + self::$collect_fail) / $use_time, 2); - $status = array( - 'id' => self::$taskid, - 'pid' => self::$taskpid, - 'mem' => $mem, - 'collect_succ' => self::$collect_succ, - 'collect_fail' => self::$collect_fail, - 'speed' => $speed, - ); - $task_status = json_encode($status); - - if (self::$use_redis) - { - $key = "server-".self::$serverid."-task_status-".self::$taskid; - queue::set($key, $task_status); - } - else - { - self::$task_status = array($task_status); - } - } - - /** - * 删除任务状态 - * - * @return void - * @author seatle - * @created time :2016-11-16 11:06 - */ - public function del_task_status($serverid, $taskid) - { - if (!self::$use_redis) - { - return false; - } - $key = "server-{$serverid}-task_status-{$taskid}"; - queue::del($key); - } - - /** - * 获得任务状态, 主进程才会调用 - * - * @return void - * @author seatle - * @created time :2016-10-30 23:56 - */ - public function get_task_status($serverid, $taskid) - { - if (!self::$use_redis) - { - return false; - } - - $key = "server-{$serverid}-task_status-{$taskid}"; - $task_status = queue::get($key); - return $task_status; - } - - /** - * 获得任务状态, 主进程才会调用 - * - * @return void - * @author seatle - * @created time :2016-10-30 23:56 - */ - public function get_task_status_list($serverid = 1, $tasknum) - { - $task_status = array(); - if (self::$use_redis) - { - for ($i = 1; $i <= $tasknum; $i++) - { - $key = "server-{$serverid}-task_status-".$i; - $task_status[] = queue::get($key); - } - } - else - { - $task_status = self::$task_status; - } - return $task_status; - } - - /** - * 添加当前服务器信息到服务器列表 - * - * @return void - * @author seatle - * @created time :2016-11-16 11:06 - */ - public function add_server_list($serverid, $tasknum) - { - if (!self::$use_redis) - { - return false; - } - - // 更新服务器列表 - $server_list_json = queue::get("server_list"); - $server_list = array(); - if (!$server_list_json) - { - $server_list[$serverid] = array( - 'serverid' => $serverid, - 'tasknum' => $tasknum, - 'time' => time(), - ); - } - else - { - $server_list = json_decode($server_list_json, true); - $server_list[$serverid] = array( - 'serverid' => $serverid, - 'tasknum' => $tasknum, - 'time' => time(), - ); - ksort($server_list); - } - queue::set("server_list", json_encode($server_list)); - } - - /** - * 从服务器列表中删除当前服务器信息 - * - * @return void - * @author seatle - * @created time :2016-11-16 11:06 - */ - public function del_server_list($serverid) - { - if (!self::$use_redis) - { - return false; - } - - $server_list_json = queue::get("server_list"); - $server_list = array(); - if ($server_list_json) - { - $server_list = json_decode($server_list_json, true); - if (isset($server_list[$serverid])) - { - unset($server_list[$serverid]); - } - - // 删除完当前的任务列表如果还存在,就更新一下Redis - if (!empty($server_list)) - { - ksort($server_list); - queue::set("server_list", json_encode($server_list)); - } - } - } - - /** - * 获取等待爬取页面数量 - * - * @param mixed $url - * @return void - * @author seatle - * @created time :2016-09-23 17:13 - */ - public function get_collect_url_num() - { - if (self::$use_redis) - { - $count = queue::get("collect_urls_num"); - } - else - { - $count = self::$collect_urls_num; - } - return $count; - } - - /** - * 获取已经爬取页面数量 - * - * @param mixed $url - * @return void - * @author seatle - * @created time :2016-09-23 17:13 - */ - public function get_collected_url_num() - { - if (self::$use_redis) - { - $count = queue::get("collected_urls_num"); - } - else - { - $count = self::$collected_urls_num; - } - return $count; - } - - /** - * 已采集页面数量加一 - * - * @param mixed $url - * @return void - * @author seatle - * @created time :2016-09-23 17:13 - */ - public function incr_collected_url_num($url) - { - if (self::$use_redis) - { - queue::incr("collected_urls_num"); - } - else - { - self::$collected_urls_num++; - } - } - - /** - * 从队列左边插入 - * - * @return void - * @author seatle - * @created time :2016-09-23 17:13 - */ - public function queue_lpush($link = array(), $allowed_repeat = false) - { - if (empty($link) || empty($link['url'])) - { - return false; - } - - $url = $link['url']; - $link = $this->link_compress($link); - - $status = false; - if (self::$use_redis) - { - $key = "collect_urls-".md5($url); - $lock = "lock-".$key; - // 加锁: 一个进程一个进程轮流处理 - if (queue::lock($lock)) - { - $exists = queue::exists($key); - // 不存在或者当然URL可重复入 - if (!$exists || $allowed_repeat) - { - // 待爬取网页记录数加一 - queue::incr("collect_urls_num"); - // 先标记为待爬取网页 - queue::set($key, time()); - // 入队列 - $link = json_encode($link); - queue::lpush("collect_queue", $link); - $status = true; - } - // 解锁 - queue::unlock($lock); - } - } - else - { - $key = md5($url); - if (!array_key_exists($key, self::$collect_urls)) - { - self::$collect_urls_num++; - self::$collect_urls[$key] = time(); - array_push(self::$collect_queue, $link); - $status = true; - } - } - return $status; - } - - /** - * 从队列右边插入 - * - * @return void - * @author seatle - * @created time :2016-09-23 17:13 - */ - public function queue_rpush($link = array(), $allowed_repeat = false) - { - if (empty($link) || empty($link['url'])) - { - return false; - } - - $url = $link['url']; - - $status = false; - if (self::$use_redis) - { - $key = "collect_urls-".md5($url); - $lock = "lock-".$key; - // 加锁: 一个进程一个进程轮流处理 - if (queue::lock($lock)) - { - $exists = queue::exists($key); - // 不存在或者当然URL可重复入 - if (!$exists || $allowed_repeat) - { - // 待爬取网页记录数加一 - queue::incr("collect_urls_num"); - // 先标记为待爬取网页 - queue::set($key, time()); - // 入队列 - $link = json_encode($link); - queue::rpush("collect_queue", $link); - $status = true; - } - // 解锁 - queue::unlock($lock); - } - } - else - { - $key = md5($url); - if (!array_key_exists($key, self::$collect_urls)) - { - self::$collect_urls_num++; - self::$collect_urls[$key] = time(); - array_unshift(self::$collect_queue, $link); - $status = true; - } - } - return $status; - } - - /** - * 从队列左边取出 - * 后进先出 - * 可以避免采集内容页有分页的时候采集失败数据拼凑不全 - * 还可以按顺序采集列表页 - * - * @return void - * @author seatle - * @created time :2016-09-23 17:13 - */ - public function queue_lpop() - { - if (self::$use_redis) - { - $link = queue::lpop("collect_queue"); - $link = json_decode($link, true); - } - else - { - $link = array_pop(self::$collect_queue); - } - return $link; - } - - /** - * 从队列右边取出 - * - * @return void - * @author seatle - * @created time :2016-09-23 17:13 - */ - public function queue_rpop() - { - if (self::$use_redis) - { - $link = queue::rpop("collect_queue"); - $link = json_decode($link, true); - } - else - { - $link = array_shift(self::$collect_queue); - } - return $link; - } - - /** - * 队列长度 - * - * @return void - * @author seatle - * @created time :2016-09-23 17:13 - */ - public function queue_lsize() - { - if (self::$use_redis) - { - $lsize = queue::lsize("collect_queue"); - } - else - { - $lsize = count(self::$collect_queue); - } - return $lsize; - } - - /** - * 采集深度加一 - * - * @return void - * @author seatle - * @created time :2016-09-23 17:13 - */ - public function incr_depth_num($depth) - { - if (self::$use_redis) - { - $lock = "lock-depth_num"; - // 锁2秒 - if (queue::lock($lock, time(), 2)) - { - if (queue::get("depth_num") < $depth) - { - queue::set("depth_num", $depth); - } - - queue::unlock($lock); - } - } - else - { - if (self::$depth_num < $depth) - { - self::$depth_num = $depth; - } - } - } - - /** - * 获得采集深度 - * - * @return void - * @author seatle - * @created time :2016-09-23 17:13 - */ - public function get_depth_num() - { - if (self::$use_redis) - { - $depth_num = queue::get("depth_num"); - return $depth_num ? $depth_num : 0; - } - else - { - return self::$depth_num; - } - } - - /** - * 提取到的field数目加一 - * - * @return void - * @author seatle - * @created time :2016-09-23 17:13 - */ - public function incr_fields_num() - { - if (self::$use_redis) - { - $fields_num = queue::incr("fields_num"); - } - else - { - self::$fields_num++; - $fields_num = self::$fields_num; - } - return $fields_num; - } - - /** - * 提取到的field数目 - * - * @return void - * @author seatle - * @created time :2016-09-23 17:13 - */ - public function get_fields_num() - { - if (self::$use_redis) - { - $fields_num = queue::get("fields_num"); - } - else - { - $fields_num = self::$fields_num; - } - return $fields_num ? $fields_num : 0; - } - - /** - * 采用xpath分析提取字段 - * - * @param mixed $html - * @param mixed $selector - * @return void - * @author seatle - * @created time :2016-09-18 10:17 - */ - public function get_fields_xpath($html, $selector, $fieldname) - { - $result = selector::select($html, $selector); - if (selector::$error) - { - log::error("Field(\"{$fieldname}\") ".selector::$error."\n"); - } - return $result; - } - - /** - * 采用正则分析提取字段 - * - * @param mixed $html - * @param mixed $selector - * @return void - * @author seatle - * @created time :2016-09-18 10:17 - */ - public function get_fields_regex($html, $selector, $fieldname) - { - $result = selector::select($html, $selector, 'regex'); - if (selector::$error) - { - log::error("Field(\"{$fieldname}\") ".selector::$error."\n"); - } - return $result; - } - - /** - * 采用CSS选择器提取字段 - * - * @param mixed $html - * @param mixed $selector - * @param mixed $fieldname - * @return void - * @author seatle - * @created time :2016-09-18 10:17 - */ - public function get_fields_css($html, $selector, $fieldname) - { - $result = selector::select($html, $selector, 'css'); - if (selector::$error) - { - log::error("Field(\"{$fieldname}\") ".selector::$error."\n"); - } - return $result; - } - - /** - * 清空shell输出内容 - * - * @return void - * @author seatle - * @created time :2016-11-16 11:06 - */ - public function clear_echo() - { - $arr = array(27, 91, 72, 27, 91, 50, 74); - foreach ($arr as $a) - { - print chr($a); - } - //array_map(create_function('$a', 'print chr($a);'), array(27, 91, 72, 27, 91, 50, 74)); - } - - /** - * 替换shell输出内容 - * - * @param mixed $message - * @param mixed $force_clear_lines - * @return void - * @author seatle - * @created time :2016-11-16 11:06 - */ - public function replace_echo($message, $force_clear_lines = NULL) - { - static $last_lines = 0; - - if(!is_null($force_clear_lines)) - { - $last_lines = $force_clear_lines; - } - - // 获取终端宽度 - $toss = $status = null; - $term_width = exec('tput cols', $toss, $status); - if($status || empty($term_width)) - { - $term_width = 64; // Arbitrary fall-back term width. - } - - $line_count = 0; - foreach(explode("\n", $message) as $line) - { - $line_count += count(str_split($line, $term_width)); - } - - // Erasure MAGIC: Clear as many lines as the last output had. - for($i = 0; $i < $last_lines; $i++) - { - // Return to the beginning of the line - echo "\r"; - // Erase to the end of the line - echo "\033[K"; - // Move cursor Up a line - echo "\033[1A"; - // Return to the beginning of the line - echo "\r"; - // Erase to the end of the line - echo "\033[K"; - // Return to the beginning of the line - echo "\r"; - // Can be consolodated into - // echo "\r\033[K\033[1A\r\033[K\r"; - } - - $last_lines = $line_count; - - echo $message."\n"; - } - - /** - * 展示启动界面, Windows 不会到这里来 - * @return void - */ - public function display_ui() - { - $loadavg = sys_getloadavg(); - foreach ($loadavg as $k=>$v) - { - $loadavg[$k] = round($v, 2); - } - $display_str = "\033[1A\n\033[K-----------------------------\033[47;30m PHPSPIDER \033[0m-----------------------------\n\033[0m"; - //$display_str = "-----------------------------\033[47;30m PHPSPIDER \033[0m-----------------------------\n\033[0m"; - $run_time_str = util::time2second(time()-self::$time_start, false); - $display_str .= 'PHPSpider version:' . self::VERSION . " PHP version:" . PHP_VERSION . "\n"; - $display_str .= 'start time:'. date('Y-m-d H:i:s', self::$time_start).' run ' . $run_time_str . " \n"; - - $display_str .= 'spider name: ' . self::$configs['name'] . "\n"; - if (self::$multiserver) - { - $display_str .= 'server id: ' . self::$serverid."\n"; - } - $display_str .= 'task number: ' . self::$tasknum . "\n"; - $display_str .= 'load average: ' . implode(", ", $loadavg) . "\n"; - $display_str .= "document: https://doc.phpspider.org\n"; - - $display_str .= $this->display_task_ui(); - - if (self::$multiserver) - { - $display_str .= $this->display_server_ui(); - } - - $display_str .= $this->display_collect_ui(); - - // 清屏 - //$this->clear_echo(); - // 返回到第一行,第一列 - //echo "\033[0;0H"; - $display_str .= "---------------------------------------------------------------------\n"; - $display_str .= "Press Ctrl-C to quit. Start success."; - if (self::$terminate) - { - $display_str .= "\n\033[33mWait for the process exits...\033[0m"; - } - //echo $display_str; - $this->replace_echo($display_str); - } - - public function display_task_ui() - { - $display_str = "-------------------------------\033[47;30m TASKS \033[0m-------------------------------\n"; - - $display_str .= "\033[47;30mtaskid\033[0m". str_pad('', self::$taskid_length+2-strlen('taskid')). - "\033[47;30mtaskpid\033[0m". str_pad('', self::$pid_length+2-strlen('taskpid')). - "\033[47;30mmem\033[0m". str_pad('', self::$mem_length+2-strlen('mem')). - "\033[47;30mcollect succ\033[0m". str_pad('', self::$urls_length-strlen('collect succ')). - "\033[47;30mcollect fail\033[0m". str_pad('', self::$urls_length-strlen('collect fail')). - "\033[47;30mspeed\033[0m". str_pad('', self::$speed_length+2-strlen('speed')). - "\n"; - - // "\033[32;40m [OK] \033[0m" - $task_status = $this->get_task_status_list(self::$serverid, self::$tasknum); - foreach ($task_status as $json) - { - $task = json_decode($json, true); - if (empty($task)) - { - continue; - } - $display_str .= str_pad($task['id'], self::$taskid_length+2). - str_pad($task['pid'], self::$pid_length+2). - str_pad($task['mem']."MB", self::$mem_length+2). - str_pad($task['collect_succ'], self::$urls_length). - str_pad($task['collect_fail'], self::$urls_length). - str_pad($task['speed']."/s", self::$speed_length+2). - "\n"; - } - //echo "\033[9;0H"; - return $display_str; - } - - public function display_server_ui() - { - $display_str = "-------------------------------\033[47;30m SERVER \033[0m------------------------------\n"; - - $display_str .= "\033[47;30mserver\033[0m". str_pad('', self::$server_length+2-strlen('serverid')). - "\033[47;30mtasknum\033[0m". str_pad('', self::$tasknum_length+2-strlen('tasknum')). - "\033[47;30mmem\033[0m". str_pad('', self::$mem_length+2-strlen('mem')). - "\033[47;30mcollect succ\033[0m". str_pad('', self::$urls_length-strlen('collect succ')). - "\033[47;30mcollect fail\033[0m". str_pad('', self::$urls_length-strlen('collect fail')). - "\033[47;30mspeed\033[0m". str_pad('', self::$speed_length+2-strlen('speed')). - "\n"; - - $server_list_json = queue::get("server_list"); - $server_list = json_decode($server_list_json, true); - foreach ($server_list as $server) - { - $serverid = $server['serverid']; - $tasknum = $server['tasknum']; - $mem = 0; - $speed = 0; - $collect_succ = $collect_fail = 0; - $task_status = $this->get_task_status_list($serverid, $tasknum); - foreach ($task_status as $json) - { - $task = json_decode($json, true); - if (empty($task)) - { - continue; - } - $mem += $task['mem']; - $speed += $task['speed']; - $collect_fail += $task['collect_fail']; - $collect_succ += $task['collect_succ']; - } - - $display_str .= str_pad($serverid, self::$server_length). - str_pad($tasknum, self::$tasknum_length+2). - str_pad($mem."MB", self::$mem_length+2). - str_pad($collect_succ, self::$urls_length). - str_pad($collect_fail, self::$urls_length). - str_pad($speed."/s", self::$speed_length+2). - "\n"; - } - return $display_str; - } - - public function display_collect_ui() - { - $display_str = "---------------------------\033[47;30m COLLECT STATUS \033[0m--------------------------\n"; - - $display_str .= "\033[47;30mfind pages\033[0m". str_pad('', 16-strlen('find pages')). - "\033[47;30mqueue\033[0m". str_pad('', 14-strlen('queue')). - "\033[47;30mcollected\033[0m". str_pad('', 15-strlen('collected')). - "\033[47;30mfields\033[0m". str_pad('', 15-strlen('fields')). - "\033[47;30mdepth\033[0m". str_pad('', 12-strlen('depth')). - "\n"; - - $collect = $this->get_collect_url_num(); - $collected = $this->get_collected_url_num(); - $queue = $this->queue_lsize(); - $fields = $this->get_fields_num(); - $depth = $this->get_depth_num(); - $display_str .= str_pad($collect, 16); - $display_str .= str_pad($queue, 14); - $display_str .= str_pad($collected, 15); - $display_str .= str_pad($fields, 15); - $display_str .= str_pad($depth, 12); - $display_str .= "\n"; - return $display_str; - } - - /** - * 判断是否附件文件 - * - * @return void - * @author seatle - * @created time :2016-09-23 17:13 - */ - //public function is_attachment_file($url) - //{ - //$mime_types = $GLOBALS['config']['mimetype']; - //$mime_types_flip = array_flip($mime_types); - - //$pathinfo = pathinfo($url); - //$fileext = isset($pathinfo['extension']) ? $pathinfo['extension'] : ''; - - //$fileinfo = array(); - //// 存在文件后缀并且是配置里面的后缀 - //if (!empty($fileext) && isset($mime_types_flip[$fileext])) - //{ - //stream_context_set_default( - //array( - //'http' => array( - //'method' => 'HEAD' - //) - //) - //); - //// 代理和Cookie以后实现, 方法和 file_get_contents 一样 使用 stream_context_create 设置 - //$headers = get_headers($url, 1); - //if (strpos($headers[0], '302')) - //{ - //$url = $headers['Location']; - //$headers = get_headers($url, 1); - //} - ////print_r($headers); - //$fileinfo = array( - //'basename' => isset($pathinfo['basename']) ? $pathinfo['basename'] : '', - //'filename' => isset($pathinfo['filename']) ? $pathinfo['filename'] : '', - //'fileext' => isset($pathinfo['extension']) ? $pathinfo['extension'] : '', - //'filesize' => isset($headers['Content-Length']) ? $headers['Content-Length'] : 0, - //'atime' => isset($headers['Date']) ? strtotime($headers['Date']) : time(), - //'mtime' => isset($headers['Last-Modified']) ? strtotime($headers['Last-Modified']) : time(), - //); - - //$mime_type = 'html'; - //$content_type = isset($headers['Content-Type']) ? $headers['Content-Type'] : ''; - //if (!empty($content_type)) - //{ - //$mime_type = isset($GLOBALS['config']['mimetype'][$content_type]) ? $GLOBALS['config']['mimetype'][$content_type] : $mime_type; - //} - //$mime_types_flip = array_flip($mime_types); - //// 判断一下是不是文件名被加什么后缀了, 比如 http://www.xxxx.com/test.jpg?token=xxxxx - //if (!isset($mime_types_flip[$fileinfo['fileext']])) - //{ - //$fileinfo['fileext'] = $mime_type; - //$fileinfo['basename'] = $fileinfo['filename'].'.'.$mime_type; - //} - //} - //return $fileinfo; - //} - -} - - diff --git a/vendor/owner888/phpspider/core/phpspider.php b/vendor/owner888/phpspider/core/phpspider.php deleted file mode 100644 index 0652c0b..0000000 --- a/vendor/owner888/phpspider/core/phpspider.php +++ /dev/null @@ -1,3598 +0,0 @@ - -// +---------------------------------------------------------------------- - -//---------------------------------- -// PHPSpider核心类文件 -// *********** -// 泛域名抓取优化版 BY KEN a-site@foxmail.com -// *********** -// * 泛域名设置:domain = array('*') -// * 增加子域名数量限制 $max_sub_num = 100 -//---------------------------------- - -namespace phpspider\core; - -require_once __DIR__.'/constants.php'; - -use Exception; -use phpspider\core\db; -use phpspider\core\log; -use phpspider\core\queue; -use phpspider\core\requests; -use phpspider\core\selector; -use phpspider\core\util; - -// 启动的时候生成data目录 -util::path_exists(PATH_DATA); -util::path_exists(PATH_DATA.'/lock'); -util::path_exists(PATH_DATA.'/log'); -util::path_exists(PATH_DATA.'/cache'); -util::path_exists(PATH_DATA.'/status'); - -class phpspider -{ - /** - * 版本号 - * @var string - */ - const VERSION = '2.1.5'; - - /** - * 爬虫爬取每个网页的时间间隔,0表示不延时, 单位: 毫秒 - */ - const INTERVAL = 100; - - /** - * 爬虫爬取每个网页的超时时间, 单位: 秒 - */ - const TIMEOUT = 5; - - /** - * 爬取失败次数, 不想失败重新爬取则设置为0 - */ - const MAX_TRY = 0; - - /** - * 爬虫爬取网页所使用的浏览器类型: pc/Mac、ios、android - * 默认类型是PC - */ - const AGENT_PC = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.139 Safari/537.36'; - const AGENT_IOS = 'Mozilla/5.0 (iPhone; CPU iPhone OS 9_3_3 like Mac OS X) AppleWebKit/601.1.46 (KHTML, like Gecko) Version/9.0 Mobile/13G34 Safari/601.1'; - const AGENT_ANDROID = 'Mozilla/5.0 (Linux; U; Android 6.0.1;zh_cn; Le X820 Build/FEXCNFN5801507014S) AppleWebKit/537.36 (KHTML, like Gecko)Version/4.0 Chrome/49.0.0.0 Mobile Safari/537.36 EUI Browser/5.8.015S'; - - /** - * pid文件的路径及名称 - * @var string - */ - //public static $pid_file = ''; - - /** - * 日志目录, 默认在data根目录下 - * @var mixed - */ - //public static $log_file = ''; - - /** - * 主任务进程ID - */ - //public static $master_pid = 0; - - /** - * 所有任务进程ID - */ - //public static $taskpids = array(); - - /** - * Daemonize. - * - * @var bool - */ - public static $daemonize = false; - - /** - * 当前进程是否终止 - */ - public static $terminate = false; - - /** - * 是否分布式 - */ - public static $multiserver = false; - - /** - * 当前服务器ID - */ - public static $serverid = 1; - - /** - * 主任务进程 - */ - public static $taskmaster = true; - - /** - * 当前任务ID - */ - public static $taskid = 1; - - /** - * 当前任务进程ID - */ - public static $taskpid = 1; - - /** - * 并发任务数 - */ - public static $tasknum = 1; - - /** - * 生成 - */ - public static $fork_task_complete = false; - - /** - * 是否使用Redis - */ - public static $use_redis = false; - - /** - * 是否保存爬虫运行状态 - */ - public static $save_running_state = false; - - /** - * 配置 - */ - public static $configs = array(); - - /** - * 要抓取的URL队列 - md5(url) => array( - 'url' => '', // 要爬取的URL - 'url_type' => '', // 要爬取的URL类型,scan_page、list_page、content_page - 'method' => 'get', // 默认为"GET"请求, 也支持"POST"请求 - 'headers' => array(), // 此url的Headers, 可以为空 - 'params' => array(), // 发送请求时需添加的参数, 可以为空 - 'context_data'=> '', // 此url附加的数据, 可以为空 - 'proxy' => false, // 是否使用代理 - 'try_num' => 0 // 抓取次数 - 'max_try' => 0 // 允许抓取失败次数 - ) - */ - public static $collect_queue = array(); - - /** - * 要抓取的URL数组 - * md5($url) => time() - */ - public static $collect_urls = array(); - - /** - * 要抓取的URL数量 - */ - public static $collect_urls_num = 0; - - /** - * 已经抓取的URL数量 - */ - public static $collected_urls_num = 0; - - /** - * 当前进程采集成功数 - */ - public static $collect_succ = 0; - - /** - * 当前进程采集失败数 - */ - public static $collect_fail = 0; - - /** - * 提取到的字段数 - */ - public static $fields_num = 0; - - /** - * 【KEN】提取到的页面数按域名计数容器 结构为 domain => number - */ - public static $pages_num = array(); - - /** - * 【KEN】单域名允许抓取的最大页面数,0为不限制 - */ - public static $max_pages = 0; - - /** - * 【KEN】花费的抓取时长计数容器 结构为 domain => number - */ - public static $duration = array(); - - /** - * 【KEN】单域名允许抓取的最大时长,单位秒,0为不限制 - */ - public static $max_duration = 0; - - /** - * 【KEN】单域名最大子域名发现数量 防止掉进蜘蛛池,推荐值:3000(多数大型网站上限) - */ - public static $max_sub_num = 3000; //建议值 3000 - - /** - * 【KEN】子进程未获取任务,超时退出前,等待计时器 - */ - - public static $stand_by_time = 0; - - /** - * 【KEN】子进程未获取任务,超时退出前,最大等待时长/秒,全部任务束后,子进程将会等待的时间,以便有缓冲时间,获得新的任务 - */ - public static $max_stand_by_time = 60; //建议值 60 - - /** - * 【KEN】每个主机并发上限,降低对方网站流量压力和减少被阻挡概率,建议值 6 ,须与 queue_order = rand 一起使用 - */ - public static $max_task_per_host = 0; //0值和非0值会使用不同类型的队列缓存库,从0改为非0值或从非0值改为0需清空队列缓存库再运行,否则任务无法添加 - public static $task_per_host_counter = array(); //计数容器 - - /** - * 采集深度 - */ - public static $depth_num = 0; - - /** - * 爬虫开始时间 - */ - public static $time_start = 0; - - /** - * 任务状态 - */ - public static $task_status = array(); - - // 导出类型配置 - public static $export_type = ''; - public static $export_file = ''; - public static $export_conf = ''; - public static $export_table = ''; - - // 数据库配置 - public static $db_config = array(); - // 队列配置 - public static $queue_config = array(); - - // 运行面板参数长度 - public static $server_length = 10; - public static $tasknum_length = 8; - public static $taskid_length = 8; - public static $pid_length = 8; - public static $mem_length = 8; - public static $urls_length = 15; - public static $speed_length = 6; - - /** - * 爬虫初始化时调用, 用来指定一些爬取前的操作 - * - * @var mixed - * @access public - */ - public $on_start = null; - - /** - * URL采集前调用 - * 比如有时需要根据某个特定的URL,来决定这次的请求是否使用代理 / 或使用哪个代理 - * - * @var mixed - * @access public - */ - public $on_before_download_page = null; - - /** - * 网页状态码回调 - * - * @var mixed - * @access public - */ - public $on_status_code = null; - - /** - * 判断当前网页是否被反爬虫, 需要开发者实现 - * - * @var mixed - * @access public - */ - public $is_anti_spider = null; - - /** - * 在一个网页下载完成之后调用, 主要用来对下载的网页进行处理 - * - * @var mixed - * @access public - */ - public $on_download_page = null; - - /** - * 在一个attached_url对应的网页下载完成之后调用. 主要用来对下载的网页进行处理 - * - * @var mixed - * @access public - */ - public $on_download_attached_page = null; - - /** - * 当前页面抽取到URL - * - * @var mixed - * @access public - */ - public $on_fetch_url = null; - - /** - * URL属于入口页 - * 在爬取到入口url的内容之后, 添加新的url到待爬队列之前调用 - * 主要用来发现新的待爬url, 并且能给新发现的url附加数据 - * - * @var mixed - * @access public - */ - public $on_scan_page = null; - - /** - * URL属于列表页 - * 在爬取到列表页url的内容之后, 添加新的url到待爬队列之前调用 - * 主要用来发现新的待爬url, 并且能给新发现的url附加数据 - * - * @var mixed - * @access public - */ - public $on_list_page = null; - - /** - * URL属于内容页 - * 在爬取到内容页url的内容之后, 添加新的url到待爬队列之前调用 - * 主要用来发现新的待爬url, 并且能给新发现的url附加数据 - * - * @var mixed - * @access public - */ - public $on_content_page = null; - - /** - * 在抽取到field内容之后调用, 对其中包含的img标签进行回调处理 - * - * @var mixed - * @access public - */ - public $on_handle_img = null; - - /** - * 当一个field的内容被抽取到后进行的回调, 在此回调中可以对网页中抽取的内容作进一步处理 - * - * @var mixed - * @access public - */ - public $on_extract_field = null; - - /** - * 在一个网页的所有field抽取完成之后, 可能需要对field进一步处理, 以发布到自己的网站 - * - * @var mixed - * @access public - */ - public $on_extract_page = null; - - /** - * 如果抓取的页面是一个附件文件, 比如图片、视频、二进制文件、apk、ipad、exe - * 就不去分析他的内容提取field了, 提取field只针对HTML - * - * @var mixed - * @access public - */ - public $on_attachment_file = null; - - public function __construct($configs = array()) - { - // 产生时钟云,解决php7下面ctrl+c无法停止bug - declare(ticks = 1); - - // 先打开以显示验证报错内容 - log::$log_show = true; - log::$log_file = isset($configs['log_file']) ? $configs['log_file'] : PATH_DATA.'/phpspider.log'; - log::$log_type = isset($configs['log_type']) ? $configs['log_type'] : false; - - // 彩蛋 - $included_files = get_included_files(); - $content = file_get_contents($included_files[0]); - if (!preg_match("#/\* Do NOT delete this comment \*/#", $content) || !preg_match("#/\* 不要删除这段注释 \*/#", $content)) - { - $msg = "Unknown error..."; - log::error($msg); - exit; - } - - $configs['name'] = isset($configs['name']) ? $configs['name'] : 'phpspider'; - $configs['proxy'] = isset($configs['proxy']) ? $configs['proxy'] : false; - $configs['user_agent'] = isset($configs['user_agent']) ? $configs['user_agent'] : self::AGENT_PC; - $configs['client_ip'] = isset($configs['client_ip']) ? $configs['client_ip'] : array(); - $configs['interval'] = isset($configs['interval']) ? $configs['interval'] : self::INTERVAL; - $configs['timeout'] = isset($configs['timeout']) ? $configs['timeout'] : self::TIMEOUT; - $configs['max_try'] = isset($configs['max_try']) ? $configs['max_try'] : self::MAX_TRY; - $configs['max_depth'] = isset($configs['max_depth']) ? $configs['max_depth'] : 0; - $configs['max_fields'] = isset($configs['max_fields']) ? $configs['max_fields'] : 0; - $configs['export'] = isset($configs['export']) ? $configs['export'] : array(); - //新增参数 BY KEN - $configs['max_pages'] = isset($configs['max_pages']) ? $configs['max_pages'] : self::$max_pages; - $configs['max_duration'] = isset($configs['max_duration']) ? $configs['max_duration'] : self::$max_duration; - $configs['max_sub_num'] = isset($configs['max_sub_num']) ? $configs['max_sub_num'] : self::$max_sub_num; - $configs['max_stand_by_time'] = isset($configs['max_stand_by_time']) ? $configs['max_stand_by_time'] : self::$max_stand_by_time; - $configs['max_task_per_host'] = isset($configs['max_task_per_host']) ? $configs['max_task_per_host'] : self::$max_task_per_host; - //启用 host并发上限时,队列参数强制为随机 - if ($configs['max_task_per_host'] > 0) - { - $configs['queue_order'] = 'rand'; - } - else - { - $configs['queue_order'] = isset($configs['queue_order']) ? $configs['queue_order'] : 'list'; - } - - // csv、sql、db - self::$export_type = isset($configs['export']['type']) ? $configs['export']['type'] : ''; - self::$export_file = isset($configs['export']['file']) ? $configs['export']['file'] : ''; - self::$export_table = isset($configs['export']['table']) ? $configs['export']['table'] : ''; - self::$db_config = isset($configs['db_config']) ? $configs['db_config'] : array(); - self::$queue_config = isset($configs['queue_config']) ? $configs['queue_config'] : array(); - - // 是否设置了并发任务数, 并且大于1, 而且不是windows环境 - if (isset($configs['tasknum']) && $configs['tasknum'] > 1 && !util::is_win()) - { - self::$tasknum = $configs['tasknum']; - } - - // 是否设置了保留运行状态 - if (isset($configs['save_running_state'])) - { - self::$save_running_state = $configs['save_running_state']; - } - - // 是否分布式 - if (isset($configs['multiserver'])) - { - self::$multiserver = $configs['multiserver']; - } - - // 当前服务器ID - if (isset($configs['serverid'])) - { - self::$serverid = $configs['serverid']; - } - - // 不同项目的采集以采集名称作为前缀区分 缩短 spider name md5长度到4位,减少内存占用 - if (isset(self::$queue_config['prefix'])) - { - self::$queue_config['prefix'] = self::$queue_config['prefix'].'-'.substr(md5($configs['name']), 0, 4); - } - - self::$configs = $configs; - } - - public function get_config($name) - { - return empty(self::$configs[$name]) ? array() : self::$configs[$name]; - } - - public function add_scan_url($url, $options = array(), $allowed_repeat = true) - { - // 投递状态 - $status = false; - //限制最大子域名数量 - if ( ! empty(self::$configs['max_sub_num'])) - { - //抓取到的子域名超过指定数量,就丢掉此域名 - $sub_domain_count = $this->sub_domain_count($url); - if ($sub_domain_count > self::$configs['max_sub_num']) - { - log::debug('Task('.self::$taskid.') subdomin = '.$sub_domain_count.' more than '.self::$configs['max_sub_num'].",add_scan_url $url [Skip]"); - return $status; - } - } - - $link = $options; - $link['url'] = $url; - $link['url_type'] = 'scan_page'; - $link = $this->link_uncompress($link); - - if ($this->is_content_page($url)) - { - $link['url_type'] = 'content_page'; - $status = $this->queue_lpush($link, $allowed_repeat); - } - elseif ($this->is_list_page($url)) - { - $link['url_type'] = 'list_page'; - $status = $this->queue_lpush($link, $allowed_repeat); - } - else - { - $status = $this->queue_lpush($link, $allowed_repeat); - } - - if ($status) - { - if ($link['url_type'] == 'scan_page') - { - log::debug("Find scan page: {$url}"); - } - elseif ($link['url_type'] == 'content_page') - { - log::debug("Find content page: {$url}"); - } - elseif ($link['url_type'] == 'list_page') - { - log::debug("Find list page: {$url}"); - } - } - - return $status; - } - - /** - * 一般在 on_scan_page 和 on_list_page 回调函数中调用, 用来往待爬队列中添加url - * 两个进程同时调用这个方法, 传递相同url的时候, 就会出现url重复进入队列 - * - * @param mixed $url - * @param mixed $options - * @return void - * @author seatle - * @created time :2016-09-18 10:17 - */ - public function add_url($url, $options = array(), $depth = 0) - { - // 投递状态 - $status = false; - //限制最大子域名数量 - if ( ! empty(self::$configs['max_sub_num'])) - { - //抓取超过 max_sub_num 子域名的,就丢掉 - $sub_domain_count = $this->sub_domain_count($url); - if ($sub_domain_count > self::$configs['max_sub_num']) - { - log::debug('Task('.self::$taskid.') subdomin = '.$sub_domain_count.' more than '.self::$configs['max_sub_num'].",add_url $url [Skip]"); - //echo '[on_download_page] ' . $domain . "'s subdomin > 1000 ,Skip!\n"; - return $status; - } - } - $link = $options; - $link['url'] = $url; - $link['depth'] = $depth; - $link = $this->link_uncompress($link); - - if ($this->is_content_page($url)) - { - $link['url_type'] = 'content_page'; - $status = $this->queue_lpush($link); - } - elseif ($this->is_list_page($url)) - { - $link['url_type'] = 'list_page'; - $status = $this->queue_lpush($link); - } - - if ($status) - { - if ($link['url_type'] == 'scan_page') - { - log::debug("Find scan page: {$url}"); - } - elseif ($link['url_type'] == 'content_page') - { - log::debug("Find content page: {$url}"); - } - elseif ($link['url_type'] == 'list_page') - { - log::debug("Find list page: {$url}"); - } - } - - return $status; - } - - /** - * 是否入口页面 - * - * @param mixed $url - * @return void - * @author seatle - * @created time :2016-10-12 19:06 - */ - public function is_scan_page($url) - { - $parse_url = parse_url($url); - //2018-1-3 通配所有域名 - if ( ! empty($parse_url['host']) and self::$configs['domains'][0] == '*') - { - return true; - } - //限定域名 - if (empty($parse_url['host']) || ! in_array($parse_url['host'], self::$configs['domains'])) - { - return false; - } - return true; - } - - /** - * 是否列表页面 - * - * @param mixed $url - * @return void - * @author seatle - * @created time :2016-10-12 19:06 - */ - public function is_list_page($url) - { - $result = false; - //过滤下载类型文件 20180209 - if (preg_match('/\.(zip|7z|cab|rar|iso|gho|jar|ace|tar|gz|bz2|z|xml|pdf|doc|txt|rtf|snd|xls|xlsx|docx|apk|ipa|flv|midi|mps|pls|pps|ppa|pwz|mp3|mp4|mpeg|mpe|asf|asx|mpg|3gp|mov|m4v|mkv|vob|vod|mod|ogg|rm|rmvb|wmv|avi|dat|exe|wps|js|css|bmp|jpg|png|gif|ico|tiff|jpeg|svg|webp|mpa|mdb|bin)$/iu', $url)) - { - return false; - } - - //增加 要排除的列表页特征正则 BY KEN - if ( ! empty(self::$configs['list_url_regexes_remove'])) - { - foreach (self::$configs['list_url_regexes_remove'] as $regex) - { - if (preg_match("#{$regex}#i", $url)) - { - return false; - } - } - } - - //增加无列表页选项,即所有页面都要抓取内容,包含列表页 - if (empty(self::$configs['list_url_regexes']) or self::$configs['list_url_regexes'][0] == 'x') - { - return false; - } - - //增加泛列表页,即所有页面都是列表页,只抓取链接,不抓取内容 - if (self::$configs['list_url_regexes'][0] == '*') - { - return true; - } - - if ( ! empty(self::$configs['list_url_regexes'])) - { - foreach (self::$configs['list_url_regexes'] as $regex) - { - if (preg_match("#{$regex}#i", $url)) - { - $result = true; - break; - } - } - } - return $result; - } - - /** - * 是否内容页面 - * - * @param mixed $url - * @return void - * @author seatle - * @created time :2016-10-12 19:06 - */ - public function is_content_page($url) - { - $result = false; - //过滤下载类型文件 20180209 - if (preg_match('/\.(zip|7z|cab|rar|iso|gho|jar|ace|tar|gz|bz2|z|xml|pdf|doc|txt|rtf|snd|xls|xlsx|docx|apk|ipa|flv|midi|mps|pls|pps|ppa|pwz|mp3|mp4|mpeg|mpe|asf|asx|mpg|3gp|mov|m4v|mkv|vob|vod|mod|ogg|rm|rmvb|wmv|avi|dat|exe|wps|js|css|bmp|jpg|png|gif|ico|tiff|jpeg|svg|webp|mpa|mdb|bin)$/iu', $url)) - { - return false; - } - - //增加 要排除的内容页特征正则 BY KEN - if ( ! empty(self::$configs['content_url_regexes_remove'])) - { - foreach (self::$configs['content_url_regexes_remove'] as $regex) - { - if (preg_match("#{$regex}#i", $url)) - { - return false; - } - } - } - - //增加泛内容模式,即所有页面都要提取内容 - if (empty(self::$configs['content_url_regexes']) or self::$configs['content_url_regexes'][0] == '*') - { - return true; - } - //无内容,泛列表模式,即所有页面都不提取内容 - if (self::$configs['content_url_regexes'][0] == 'x') - { - return false; - } - - if ( ! empty(self::$configs['content_url_regexes'])) - { - foreach (self::$configs['content_url_regexes'] as $regex) - { - if (preg_match("#{$regex}#i", $url)) - { - $result = true; - break; - } - } - } - return $result; - } - - /** - * Parse command. - * php yourfile.php start | stop | status | kill - * - * @return void - */ - public function parse_command() - { - // 检查运行命令的参数 - global $argv; - $start_file = $argv[0]; - - // 命令 - $command = isset($argv[1]) ? trim($argv[1]) : 'start'; - - // 子命令, 目前只支持-d - $command2 = isset($argv[2]) ? $argv[2] : ''; - - // 根据命令做相应处理 - switch($command) - { - // 启动 phpspider - case 'start': - if ($command2 === '-d') - { - self::$daemonize = true; - } - break; - case 'stop': - exec("ps aux | grep $start_file | grep -v grep | awk '{print $2}'", $info); - if (count($info) <= 1) - { - echo "PHPSpider[$start_file] not run\n"; - } - else - { - //echo "PHPSpider[$start_file] is stoping ...\n"; - echo "PHPSpider[$start_file] stop success"; - exec("ps aux | grep $start_file | grep -v grep | awk '{print $2}' |xargs kill -SIGINT", $info); - } - exit; - break; - case 'kill': - exec("ps aux | grep $start_file | grep -v grep | awk '{print $2}' |xargs kill -SIGKILL"); - break; - // 显示 phpspider 运行状态 - case 'status': - exit(0); - // 未知命令 - default : - exit("Usage: php yourfile.php {start|stop|status|kill}\n"); - } - } - - /** - * Signal hander. - * - * @param int $signal - */ - public function signal_handler($signal) - { - switch ($signal) - { - // Stop. - case SIGINT: - log::warn('Program stopping...'); - self::$terminate = true; - break; - // Show status. - case SIGUSR2: - echo "show status\n"; - break; - } - } - - /** - * Install signal handler. - * - * @return void - */ - public function install_signal() - { - if (function_exists('pcntl_signal')) - { - // stop - // static调用方式 - //pcntl_signal(SIGINT, array(__CLASS__, 'signal_handler'), false); - pcntl_signal(SIGINT, array(&$this, 'signal_handler'), false); - // status - pcntl_signal(SIGUSR2, array(&$this, 'signal_handler'), false); - // ignore - pcntl_signal(SIGPIPE, SIG_IGN, false); - } - } - - /** - * Run as deamon mode. - * - * @throws Exception - */ - protected static function daemonize() - { - if (!self::$daemonize) - { - return; - } - - // fork前一定要关闭redis - queue::clear_link(); - - umask(0); - $pid = pcntl_fork(); - if (-1 === $pid) - { - throw new Exception('fork fail'); - } - elseif ($pid > 0) - { - exit(0); - } - if (-1 === posix_setsid()) - { - throw new Exception('setsid fail'); - } - // Fork again avoid SVR4 system regain the control of terminal. - $pid = pcntl_fork(); - if (-1 === $pid) - { - throw new Exception('fork fail'); - } - elseif (0 !== $pid) - { - exit(0); - } - } - - /** - * 检查是否终止当前进程 - * - * @return void - * @author seatle - * @created time :2016-11-16 11:06 - */ - public function check_terminate() - { - if (!self::$terminate) - { - return false; - } - - // 删除当前任务状态 - $this->del_task_status(self::$serverid, self::$taskid); - - if (self::$taskmaster) - { - // 检查子进程是否都退出 - while (true) - { - $all_stop = true; - for ($i = 2; $i <= self::$tasknum; $i++) - { - // 只要一个还活着就说明没有完全退出 - $task_status = $this->get_task_status(self::$serverid, $i); - if ($task_status) - { - $all_stop = false; - } - } - if ($all_stop) - { - break; - } - else - { - log::warn('Task stop waiting...'); - } - sleep(1); - } - - $this->del_server_list(self::$serverid); - - // 显示最后结果 - log::$log_show = true; - - $spider_time_run = util::time2second(intval(microtime(true) - self::$time_start)); - log::note("Spider finished in {$spider_time_run}"); - - $get_collected_url_num = $this->get_collected_url_num(); - log::note("Total pages: {$get_collected_url_num} \n"); - } - exit(); - } - - public function start() - { - $this->parse_command(); - - // 爬虫开始时间 - self::$time_start = time(); - // 当前任务ID - self::$taskid = 1; - // 当前任务进程ID - self::$taskpid = function_exists('posix_getpid') ? posix_getpid() : 1; - self::$collect_succ = 0; - self::$collect_fail = 0; - - //-------------------------------------------------------------------------------- - // 运行前验证 - //-------------------------------------------------------------------------------- - - // 检查PHP版本 - if (version_compare(PHP_VERSION, '5.3.0', 'lt')) - { - log::error('PHP 5.3+ is required, currently installed version is: ' . phpversion()); - exit; - } - - // 检查CURL扩展 - if(!function_exists('curl_init')) - { - log::error('The curl extension was not found'); - exit; - } - - // 多任务需要pcntl扩展支持 - if (self::$tasknum > 1 && !function_exists('pcntl_fork')) - { - log::error('Multitasking needs pcntl, the pcntl extension was not found'); - exit; - } - - // 守护进程需要pcntl扩展支持 - if (self::$daemonize && !function_exists('pcntl_fork')) - { - log::error('Daemonize needs pcntl, the pcntl extension was not found'); - exit; - } - - // 集群、保存运行状态、多任务都需要Redis支持 - if ( self::$multiserver || self::$save_running_state || self::$tasknum > 1 ) - { - self::$use_redis = true; - - queue::set_connect('default', self::$queue_config); - if (!queue::init()) - { - if ( self::$multiserver ) - { - log::error('Multiserver needs Redis support, '.queue::$error); - exit; - } - - if ( self::$tasknum > 1 ) - { - log::error('Multitasking needs Redis support, '.queue::$error); - exit; - } - - if ( self::$save_running_state ) - { - log::error('Spider kept running state needs Redis support, '.queue::$error); - exit; - } - } - } - - // 检查导出 - $this->check_export(); - - // 检查缓存 - $this->check_cache(); - - // 检查 scan_urls - if (empty(self::$configs['scan_urls'])) - { - log::error('No scan url to start'); - exit; - } - - foreach ( self::$configs['scan_urls'] as $url ) - { - // 只检查配置中的入口URL, 通过 add_scan_url 添加的不检查了. - if (!$this->is_scan_page($url)) - { - log::error("Domain of scan_urls (\"{$url}\") does not match the domains of the domain name"); - exit; - } - } - - // windows 下没法显示面板, 强制显示日志 - if (util::is_win()) - { - self::$configs['name'] = iconv('UTF-8', 'GB2312//IGNORE', self::$configs['name']); - log::$log_show = true; - } - // 守护进程下也显示日志 - elseif (self::$daemonize) - { - log::$log_show = true; - } - else - { - log::$log_show = isset(self::$configs['log_show']) ? self::$configs['log_show'] : false; - } - - if (log::$log_show) - { - global $argv; - $start_file = $argv[0]; - - $header = ''; - if ( ! util::is_win()) - { - $header .= "\033[33m"; - } - - $header .= "\n[ ".self::$configs['name']." Spider ] is started...\n\n"; - $header .= ' * PHPSpider Version: '.self::VERSION."\n"; - $header .= " * Documentation: https://doc.phpspider.org\n"; - $header .= ' * Task Number: '.self::$tasknum."\n\n"; - $header .= "Input \"php $start_file stop\" to quit. Start success.\n"; - if ( ! util::is_win()) - { - $header .= "\033[0m"; - } - - log::note($header); - } - - // 如果是守护进程,恢复日志状态 - //if (self::$daemonize) - //{ - //log::$log_show = isset(self::$configs['log_show']) ? self::$configs['log_show'] : false; - //} - - // 多任务和分布式都要清掉, 当然分布式只清自己的 - $this->init_redis(); - - //-------------------------------------------------------------------------------- - // 生成多任务 - //-------------------------------------------------------------------------------- - - // 添加入口URL到队列 - foreach ( self::$configs['scan_urls'] as $url ) - { - // false 表示不允许重复 - $this->add_scan_url($url, null, false); - } - - // 放这个位置, 可以添加入口页面 - if ($this->on_start) - { - call_user_func($this->on_start, $this); - } - - if (!self::$daemonize) - { - if (!log::$log_show) - { - // 第一次先清屏 - $this->clear_echo(); - - // 先显示一次面板, 然后下面再每次采集成功显示一次 - $this->display_ui(); - } - } - else - { - $this->daemonize(); - } - - // 安装信号 - $this->install_signal(); - - // 开始采集 - $this->do_collect_page(); - - // 从服务器列表中删除当前服务器信息 - $this->del_server_list(self::$serverid); - } - - /** - * 创建一个子进程 - * @param Worker $worker - * @throws Exception - */ - public function fork_one_task($taskid) - { - $pid = pcntl_fork(); - - // 主进程记录子进程pid - if($pid > 0) - { - // 暂时没用 - //self::$taskpids[$taskid] = $pid; - } - // 子进程运行 - elseif (0 === $pid) - { - log::warn("Fork children task({$taskid}) successful..."); - - // 初始化子进程参数 - self::$time_start = microtime(true); - self::$taskid = $taskid; - self::$taskmaster = false; - self::$taskpid = posix_getpid(); - self::$collect_succ = 0; - self::$collect_fail = 0; - - queue::set_connect('default', self::$queue_config); - queue::init(); - - //退出前计时,等待1分钟,如果获取不到新任务,再退出 - self::$stand_by_time = 0; - while (self::$stand_by_time < self::$configs['max_stand_by_time']) - { - $this->do_collect_page(); - log::warn('Task('.self::$taskid.') Stand By '.self::$stand_by_time.'/'.self::$configs['max_stand_by_time'].' s'); - self::$stand_by_time++; - sleep(1); - } - $queue_lsize = $this->queue_lsize(); - log::warn('Task('.self::$taskid.') exit : queue_lsize = '.$queue_lsize); - $this->del_task_status(self::$serverid, $taskid); - - // 这里用0表示子进程正常退出 - exit(0); - } - else - { - log::error("Fork children task({$taskid}) fail..."); - exit; - } - } - - public function do_collect_page() - { - while( $queue_lsize = $this->queue_lsize() ) - { - // 如果是主任务 - if (self::$taskmaster) - { - // 多任务下主任务未准备就绪 - if (self::$tasknum > 1 && !self::$fork_task_complete) - { - // 主进程采集到多于任务数2个时, 生成子任务一起采集 - if ($queue_lsize > self::$tasknum + 2) - { - self::$fork_task_complete = true; - - // fork 子进程前一定要先干掉redis连接fd, 不然会存在进程互抢redis fd 问题 - queue::clear_link(); - // task进程从2开始, 1被master进程所使用 - for ($i = 2; $i <= self::$tasknum; $i++) - { - $this->fork_one_task($i); - } - } - } - //在主进程中,保存当前配置到缓存,以使子进程可实时读取动态修改后的配置 20180209 - if (self::$use_redis and ! empty(self::$configs)) - { - queue::set('configs_'.self::$configs['name'], json_encode(self::$configs)); - } - // 抓取页面 - $this->collect_page(); - // 保存任务状态 - $this->set_task_status(); - - // 每采集成功一次页面, 就刷新一次面板 - if (!log::$log_show && !self::$daemonize) - { - $this->display_ui(); - } - } - // 如果是子任务 - else - { - // 主进程采集到多于任务数2个时, 子任务可以采集, 否则等待... - if ($queue_lsize > self::$taskid + 2) - { - //在子进程中,从内存中实时读取当前最新配置,用于适应主进程常驻内存模式,无限循环后的配置变动 20180209 - if (self::$use_redis and ! empty(self::$configs)) - { - if ($configs_active = queue::get('configs_'.self::$configs['name'])) - { - self::$configs = json_decode($configs_active, true); - } - } - // 抓取页面 - $this->collect_page(); - // 保存任务状态 - $this->set_task_status(); - } - else - { - log::warn('Task('.self::$taskid.') waiting...reason: queue_lsize = '.$queue_lsize.' < tasknum = '.self::$tasknum); - sleep(1); - } - } - - // 检查进程是否收到关闭信号 - $this->check_terminate(); - } - } - - /** - * 爬取页面 - * - * @param mixed $collect_url 要抓取的链接 - * @return void - * @author seatle - * @created time :2016-09-18 10:17 - */ - public function collect_page() - { - //减少非必要 queue_lsize 查询 20180214 - if (isset(self::$configs['log_type']) and strstr(self::$configs['log_type'], 'info')) - { - $get_collect_url_num = $this->get_collect_url_num(); - log::info('task id: '.self::$taskid." Find pages: {$get_collect_url_num} "); - - $queue_lsize = $this->queue_lsize(); - log::info('task id: '.self::$taskid." Waiting for collect pages: {$queue_lsize} "); - - $get_collected_url_num = $this->get_collected_url_num(); - log::info('task id: '.self::$taskid." Collected pages: {$get_collected_url_num} "); - - // 多任务的时候输出爬虫序号 - if (self::$tasknum > 1) - { - log::info('Current task id: '.self::$taskid); - } - } - //顺序提取任务,先进先出(当配置 queue_order = rand ,先进先出无效,都为随机提取任务) - $link = $this->queue_rpop(); - - if (empty($link)) - { - log::warn('Task('.self::$taskid.') Get Task link Fail...Stand By...'); - return false; - } - $link = $this->link_uncompress($link); - if (empty($link['url'])) - { - log::warn('Task('.self::$taskid.') Get Task url Fail...Stand By...'); - return false; - } - self::$stand_by_time = 0; //接到任务,则超时退出计时重置 - - $url = $link['url']; - - //限制单域名最大url数量 20180213 - if (isset(self::$configs['max_pages']) and self::$configs['max_pages'] > 0) - { - $domain_pages_num = $this->incr_pages_num($url); - if ($domain_pages_num > self::$configs['max_pages']) - { - log::debug('Task('.self::$taskid.') pages = '.$domain_pages_num.' more than '.self::$configs['max_pages'].", $url [Skip]"); - return false; - } - } - - //限制单域名最大花费时长 20180213 - if (isset(self::$configs['max_duration']) and self::$configs['max_duration'] > 0) - { - $domain_duration = $this->get_duration_num($url); - if ($domain_duration > self::$configs['max_duration']) - { - log::debug('Task('.self::$taskid.') duration = '.$domain_duration.' more than '.self::$configs['max_duration'].", $url [Skip]"); - return false; - } - } - - //当前 host 并发检测 2018-5 BY KEN - if (self::$configs['max_task_per_host'] > 0) - { - $task_per_host = $this->get_task_per_host_num($url); - if ($task_per_host < self::$configs['max_task_per_host']) - { - $task_per_host = $this->incr_task_per_host($url); - } - else - { - log::warn('Task('.self::$taskid.') task_per_host = '.$task_per_host.' > '.self::$configs['max_task_per_host'].' ; URL: '.$url.' will be retry later...'); - $this->queue_lpush($link); //放回队列 - usleep(100000); - return false; - } - } - - // 已采集页面数量 +1 - $this->incr_collected_url_num($url); - - // 爬取页面开始时间 - $page_time_start = microtime(true); - - // 下载页面前执行 - // 比如有时需要根据某个特定的URL,来决定这次的请求是否使用代理 / 或使用哪个代理 - if ($this->on_before_download_page) - { - $return = call_user_func($this->on_before_download_page, $url, $link, $this); - if (isset($return)) $link = $return; - } - - requests::$input_encoding = null; - $html = $this->request_url($url, $link); - - //记录速度较慢域名花费抓取时间 20180213 - $time_run = round(microtime(true) - $page_time_start); - if ($time_run > 1) - { - $this->incr_duration_num($url, $time_run); - } - - // 爬完页面开始处理时间 - $page_time_start = microtime(true); - - if (!$html) - { - return false; - } - // 当前正在爬取的网页页面的对象 - $page = array( - 'url' => $url, - 'raw' => $html, - 'request' => array( - 'url' => $url, - 'method' => $link['method'], - 'headers' => $link['headers'], - 'params' => $link['params'], - 'context_data' => $link['context_data'], - 'try_num' => $link['try_num'], - 'max_try' => $link['max_try'], - 'depth' => $link['depth'], - 'taskid' => self::$taskid, - ), - ); - //printf("memory usage: %.2f M\n", memory_get_usage() / 1024 / 1024 ); - unset($html); - - //-------------------------------------------------------------------------------- - // 处理回调函数 - //-------------------------------------------------------------------------------- - - // 判断当前网页是否被反爬虫了, 需要开发者实现 - if ($this->is_anti_spider) - { - $is_anti_spider = call_user_func($this->is_anti_spider, $url, $page['raw'], $this); - // 如果在回调函数里面判断被反爬虫并且返回true - if ($is_anti_spider) - { - return false; - } - } - - // 在一个网页下载完成之后调用. 主要用来对下载的网页进行处理. - // 比如下载了某个网页, 希望向网页的body中添加html标签 - if ($this->on_download_page) - { - $return = call_user_func($this->on_download_page, $page, $this); - // 针对那些老是忘记return的人 - if (isset($return)) - { - $page = $return; - } - unset($return); - } - - // 是否从当前页面分析提取URL - // 回调函数如果返回false表示不需要再从此网页中发现待爬url - $is_find_url = true; - if ($link['url_type'] == 'scan_page') - { - if ($this->on_scan_page) - { - $return = call_user_func($this->on_scan_page, $page, $page['raw'], $this); - if (isset($return)) - { - $is_find_url = $return; - } - - unset($return); - } - } - elseif ($link['url_type'] == 'content_page') - { - if ($this->on_content_page) - { - $return = call_user_func($this->on_content_page, $page, $page['raw'], $this); - if (isset($return)) - { - $is_find_url = $return; - } - unset($return); - } - } - elseif ($link['url_type'] == 'list_page') - { - if ($this->on_list_page) - { - $return = call_user_func($this->on_list_page, $page, $page['raw'], $this); - if (isset($return)) - { - $is_find_url = $return; - } - unset($return); - } - } - - // on_scan_page、on_list_page、on_content_page 返回false表示不需要再从此网页中发现待爬url - if ($is_find_url) - { - // 如果深度没有超过最大深度, 获取下一级URL - if (self::$configs['max_depth'] == 0 || $link['depth'] < self::$configs['max_depth']) - { - // 分析提取HTML页面中的URL - $this->get_urls($page['raw'], $url, $link['depth'] + 1); - } - } - - // 如果是内容页, 分析提取HTML页面中的字段 - // 列表页也可以提取数据的, source_type: urlcontext, 未实现 - if ($link['url_type'] == 'content_page') - { - $this->get_html_fields($page['raw'], $url, $page); - } - - // 如果当前深度大于缓存的, 更新缓存 - $this->incr_depth_num($link['depth']); - - // 处理页面耗时时间 - $time_run = round(microtime(true) - $page_time_start, 3); - log::debug('task id: '.self::$taskid." Success process page {$url} in {$time_run} s"); - - $spider_time_run = util::time2second(intval(microtime(true) - self::$time_start)); - log::info('task id: '.self::$taskid." Spider running in {$spider_time_run}"); - - // 爬虫爬取每个网页的时间间隔, 单位: 毫秒 - if (!isset(self::$configs['interval'])) - { - // 默认睡眠100毫秒, 太快了会被认为是ddos - self::$configs['interval'] = 100; - } - usleep(self::$configs['interval'] * 1000); - } - - /** - * 下载网页, 得到网页内容 - * - * @param mixed $url - * @param mixed $link - * @return void - * @author seatle - * @created time :2016-09-18 10:17 - */ - public function request_url($url, $link = array()) - { - $time_start = microtime(true); - - //$url = "http://www.qiushibaike.com/article/117568316"; - - // 设置了编码就不要让requests去判断了 - if (isset(self::$configs['input_encoding'])) - { - requests::$input_encoding = self::$configs['input_encoding']; - } - // 得到的编码如果不是utf-8的要转成utf-8, 因为xpath只支持utf-8 - requests::$output_encoding = 'utf-8'; - requests::set_timeout(self::$configs['timeout']); - requests::set_useragent(self::$configs['user_agent']); - - // 先删除伪造IP - requests::del_client_ip(); - // 是否设置了伪造IP - if (self::$configs['client_ip']) - { - requests::set_client_ip(self::$configs['client_ip']); - } - - // 先删除代理,免得前一个URL的代理被带过来了 - requests::del_proxy(); - // 是否设置了代理 - if ($link['proxy']) - { - requests::set_proxy($link['proxy']); - } - - // 如何设置了 HTTP Headers - if (!empty($link['headers'])) - { - foreach ($link['headers'] as $k=>$v) - { - requests::set_header($k, $v); - } - } - //限制 http 请求模式为 get 或 post - $method = trim(strtolower($link['method'])); - $method = ($method == 'post') ? 'post' : 'get'; - $params = empty($link['params']) ? array() : $link['params']; - $html = requests::$method($url, $params); - // 此url附加的数据不为空, 比如内容页需要列表页一些数据, 拼接到后面去 - if ($html && !empty($link['context_data'])) - { - $html .= $link['context_data']; - } - - $http_code = requests::$status_code; - - //请求完成 host 的并发计数减 1 2018-5 BY KEN - if (self::$configs['max_task_per_host'] > 0) - { - $this->incr_task_per_host($url, 'decr'); - } - - if ($this->on_status_code) - { - $return = call_user_func($this->on_status_code, $http_code, $url, $html, $this); - if (isset($return)) - { - $html = $return; - } - unset($return); - if ( ! $html) - { - return false; - } - } - - if ($http_code != 200) - { - // 如果是301、302跳转, 抓取跳转后的网页内容 - if ($http_code == 301 || $http_code == 302) - { - $info = requests::$info; - //if (isset($info['redirect_url'])) - if (!empty($info['redirect_url'])) - { - $url = $info['redirect_url']; - requests::$input_encoding = null; - $method = empty($link['method']) ? 'get' : strtolower($link['method']); - $params = empty($link['params']) ? array() : $link['params']; - $html = requests::$method($url, $params); - // 有跳转的就直接获取就好,不要调用自己,容易进入死循环 - //$html = $this->request_url($url, $link); - if ($html && !empty($link['context_data'])) - { - $html .= $link['context_data']; - } - } - else - { - return false; - } - } - else - { - if ( ! empty(self::$configs['max_try']) and $http_code == 407) - { - // 扔到队列头部去, 继续采集 - $this->queue_rpush($link); - log::error("Failed to download page {$url}"); - self::$collect_fail++; - } - elseif ( ! empty(self::$configs['max_try']) and in_array($http_code, array('0', '502', '503', '429'))) - { - // 采集次数加一 - $link['try_num']++; - // 抓取次数 小于 允许抓取失败次数 - if ( $link['try_num'] <= $link['max_try'] ) - { - // 扔到队列头部去, 继续采集 - $this->queue_rpush($link); - } - log::error("Failed to download page {$url}, retry({$link['try_num']})"); - } - else - { - log::error("Failed to download page {$url}"); - self::$collect_fail++; - } - log::error("HTTP CODE: {$http_code}"); - return false; - } - } - - // 爬取页面耗时时间 - $time_run = round(microtime(true) - $time_start, 3); - log::debug("Success download page {$url} in {$time_run} s"); - self::$collect_succ++; - - return $html; - } - - /** - * 分析提取HTML页面中的URL - * - * @param mixed $html HTML内容 - * @param mixed $collect_url 抓取的URL, 用来拼凑完整页面的URL - * @return void - * @author seatle - * @created time :2016-09-18 10:17 - */ - public function get_urls($html, $collect_url, $depth = 0) - { - //-------------------------------------------------------------------------------- - // 正则匹配出页面中的URL - //-------------------------------------------------------------------------------- - $urls = selector::select($html, '//a/@href'); - //preg_match_all("/ \r\n\t]{1,}/isU", $html, $matchs); - //$urls = array(); - //if (!empty($matchs[1])) - //{ - //foreach ($matchs[1] as $url) - //{ - //$urls[] = str_replace(array("\"", "'",'&'), array("",'','&'), $url); - //} - //} - - if (empty($urls)) - { - return false; - } - - // 如果页面上只有一个url,要把他转为数组,否则下面会报警告 - if (!is_array($urls)) - { - $urls = array($urls); - } - - foreach ($urls as $key=>$url) - { - //限制最大子域名数量 - if ( ! empty(self::$configs['max_sub_num'])) - { - //抓取子域名超过超过指定值,就丢掉 - $sub_domain_count = $this->sub_domain_count($url); - if ($sub_domain_count > self::$configs['max_sub_num']) - { - unset($urls[$key]); - log::debug('Task('.self::$taskid.') subdomin = '.$sub_domain_count.' more than '.self::$configs['max_sub_num'].",get_urls $url [Skip]"); - continue; - } - } - $urls[$key] = str_replace(array('"', "'", '&'), array('', '', '&'), $url); - } - - //-------------------------------------------------------------------------------- - // 过滤和拼凑URL - //-------------------------------------------------------------------------------- - // 去除重复的URL - $urls = array_unique($urls); - foreach ($urls as $k=>$url) - { - $url = trim($url); - if (empty($url)) - { - continue; - } - - $val = $this->fill_url($url, $collect_url); - - //限制单域名最大url数量 20180213 - if ($val and isset(self::$configs['max_pages']) and self::$configs['max_pages'] > 0) - { - $domain_pages_num = $this->incr_pages_num($val); - if ($domain_pages_num > self::$configs['max_pages']) - { - continue; - } - } - - if ($val) - { - $urls[$k] = $val; - } - else - { - unset($urls[$k]); - } - } - - if (empty($urls)) - { - return false; - } - - //-------------------------------------------------------------------------------- - // 把抓取到的URL放入队列 - //-------------------------------------------------------------------------------- - foreach ($urls as $url) - { - if ($this->on_fetch_url) - { - $return = call_user_func($this->on_fetch_url, $url, $this); - $url = isset($return) ? $return : $url; - unset($return); - - // 如果 on_fetch_url 返回 false,此URL不入队列 - if (!$url) - { - continue; - } - } - - // 把当前页当做找到的url的Referer页 - $options = array( - 'headers' => array( - 'Referer' => $collect_url, - ) - ); - $this->add_url($url, $options, $depth); - } - } - - /** - * 获得完整的连接地址 - * - * @param mixed $url 要检查的URL - * @param mixed $collect_url 从那个URL页面得到上面的URL - * @return void - * @author seatle - * @created time :2016-09-23 17:13 - */ - public function fill_url($url, $collect_url) - { - $url = trim($url); - $collect_url = trim($collect_url); - - // 排除JavaScript的连接 - //if (strpos($url, "javascript:") !== false) - if (preg_match("@^(mailto|javascript:|#|'|\")@i", $url) || $url == '') - { - return false; - } - // 排除没有被解析成功的语言标签 - if (substr($url, 0, 3) == '<%=' or substr($url, 0, 1) == '{' or substr($url, 0, 2) == ' {') - // if(substr($url, 0, 3) == '<%=') - { - return false; - } - - $parse_url = @parse_url($collect_url); - if (empty($parse_url['scheme']) || empty($parse_url['host'])) - { - return false; - } - // 过滤mailto、tel、sms、wechat、sinaweibo、weixin等协议 - if ( ! in_array($parse_url['scheme'], array('http', 'https'))) - { - return false; - } - $scheme = $parse_url['scheme']; - $domain = $parse_url['host']; - $path = empty($parse_url['path']) ? '' : $parse_url['path']; - $base_url_path = $domain.$path; - $base_url_path = preg_replace("/\/([^\/]*)\.(.*)$/", '/', $base_url_path); - $base_url_path = preg_replace("/\/$/", '', $base_url_path); - $i = $path_step = 0; - $dstr = $pstr = ''; - $pos = strpos($url, '#'); - if ($pos > 0) - { - // 去掉#和后面的字符串 - $url = substr($url, 0, $pos); - } - - // 修正url格式为 //www.jd.com/111.html 为正确的http - if (substr($url, 0, 2) == '//') - { - $url = preg_replace('/^\/\//iu', '', $url); - } - // /1234.html - elseif($url[0] == '/') - { - $url = $domain.$url; - } - // ./1234.html、../1234.html 这种类型的 - elseif($url[0] == '.') - { - if(!isset($url[2])) - { - return false; - } - else - { - $urls = explode('/',$url); - foreach($urls as $u) - { - if( $u == '..' ) - { - $path_step++; - } - // 遇到 ., 不知道为什么不直接写$u == '.', 貌似一样的 - else if( $i < count($urls)-1 ) - { - $dstr .= $urls[$i].'/'; - } - else - { - $dstr .= $urls[$i]; - } - $i++; - } - $urls = explode('/',$base_url_path); - if(count($urls) <= $path_step) - { - return false; - } - else - { - $pstr = ''; - for($i=0;$i - * @created time :2016-11-05 18:58 - */ - public function link_compress($link) - { - if (empty($link['url_type'])) - { - unset($link['url_type']); - } - - if (empty($link['method']) || strtolower($link['method']) == 'get') - { - unset($link['method']); - } - - if (empty($link['headers'])) - { - unset($link['headers']); - } - - if (empty($link['params'])) - { - unset($link['params']); - } - - if (empty($link['context_data'])) - { - unset($link['context_data']); - } - - if (empty($link['proxy'])) - { - unset($link['proxy']); - } - - if (empty($link['try_num'])) - { - unset($link['try_num']); - } - - if (empty($link['max_try'])) - { - unset($link['max_try']); - } - - if (empty($link['depth'])) - { - unset($link['depth']); - } - //$json = json_encode($link); - //$json = gzdeflate($json); - return $link; - } - - /** - * 连接对象解压缩 - * - * @param mixed $link - * @return void - * @author seatle - * @created time :2016-11-05 18:58 - */ - public function link_uncompress($link) - { - $link = array( - 'url' => isset($link['url']) ? $link['url'] : '', - 'url_type' => isset($link['url_type']) ? $link['url_type'] : '', - 'method' => isset($link['method']) ? $link['method'] : 'get', - 'headers' => isset($link['headers']) ? $link['headers'] : array(), - 'params' => isset($link['params']) ? $link['params'] : array(), - 'context_data' => isset($link['context_data']) ? $link['context_data'] : '', - 'proxy' => isset($link['proxy']) ? $link['proxy'] : self::$configs['proxy'], - 'try_num' => isset($link['try_num']) ? $link['try_num'] : 0, - 'max_try' => isset($link['max_try']) ? $link['max_try'] : self::$configs['max_try'], - 'depth' => isset($link['depth']) ? $link['depth'] : 0, - ); - - return $link; - } - - /** - * 分析提取HTML页面中的字段 - * - * @param mixed $html - * @return void - * @author seatle - * @created time :2016-09-18 10:17 - */ - public function get_html_fields($html, $url, $page) - { - $fields = $this->get_fields(self::$configs['fields'], $html, $url, $page); - - if (!empty($fields)) - { - if ($this->on_extract_page) - { - $return = call_user_func($this->on_extract_page, $page, $fields); - if (!isset($return)) - { - log::warn("on_extract_page return value can't be empty"); - } - // 返回false,跳过当前页面,内容不入库 - elseif ($return === false) - { - return false; - } - elseif (!is_array($return)) - { - log::warn('on_extract_page return value must be an array'); - } - else - { - $fields = $return; - } - } - - if (isset($fields) && is_array($fields)) - { - $fields_num = $this->incr_fields_num(); - if (self::$configs['max_fields'] != 0 && $fields_num > self::$configs['max_fields']) - { - exit(0); - } - - if (version_compare(PHP_VERSION,'5.4.0','<')) - { - $fields_str = json_encode($fields); - $fields_str = preg_replace_callback("#\\\u([0-9a-f]{4})#i", function ($matchs) - { - return @iconv('UCS-2BE', 'UTF-8', pack('H4', $matchs[1])); - }, $fields_str); - } - else - { - $fields_str = json_encode($fields, JSON_UNESCAPED_UNICODE); - } - - if (util::is_win()) - { - $fields_str = mb_convert_encoding($fields_str, 'gb2312', 'utf-8'); - } - log::info("Result[{$fields_num}]: ".$fields_str); - - // 如果设置了导出选项 - if (!empty(self::$configs['export'])) - { - self::$export_type = isset(self::$configs['export']['type']) ? self::$configs['export']['type'] : ''; - if (self::$export_type == 'csv') - { - util::put_file(self::$export_file, util::format_csv($fields)."\n", FILE_APPEND); - } - elseif (self::$export_type == 'sql') - { - $sql = db::insert(self::$export_table, $fields, true); - util::put_file(self::$export_file, $sql.";\n", FILE_APPEND); - } - elseif (self::$export_type == 'db') - { - db::insert(self::$export_table, $fields); - } - } - } - } - } - - /** - * 根据配置提取HTML代码块中的字段 - * - * @param mixed $confs - * @param mixed $html - * @param mixed $page - * @return void - * @author seatle - * @created time :2016-09-23 17:13 - */ - public function get_fields($confs, $html, $url, $page) - { - $fields = array(); - foreach ($confs as $conf) - { - // 当前field抽取到的内容是否是有多项 - $repeated = isset($conf['repeated']) && $conf['repeated'] ? true : false; - // 当前field抽取到的内容是否必须有值 - $required = isset($conf['required']) && $conf['required'] ? true : false; - - if (empty($conf['name'])) - { - log::error("The field name is null, please check your \"fields\" and add the name of the field\n"); - exit; - } - - $values = NULL; - // 如果定义抽取规则 - if (!empty($conf['selector'])) - { - // 如果这个field是上一个field的附带连接 - if (isset($conf['source_type']) && $conf['source_type']=='attached_url') - { - // 取出上个field的内容作为连接, 内容分页是不进队列直接下载网页的 - if (!empty($fields[$conf['attached_url']])) - { - $collect_url = $this->fill_url($fields[$conf['attached_url']], $url); - log::debug("Find attached content page: {$collect_url}"); - $link['url'] = $collect_url; - $link = $this->link_uncompress($link); - requests::$input_encoding = null; - $html = $this->request_url($collect_url, $link); - // 在一个attached_url对应的网页下载完成之后调用. 主要用来对下载的网页进行处理. - if ($this->on_download_attached_page) - { - $return = call_user_func($this->on_download_attached_page, $html, $this); - if (isset($return)) - { - $html = $return; - } - } - - // 请求获取完分页数据后把连接删除了 - unset($fields[$conf['attached_url']]); - } - } - - // 没有设置抽取规则的类型 或者 设置为 xpath - if (!isset($conf['selector_type']) || $conf['selector_type']=='xpath') - { - // 如果找不到,返回的是false - $values = $this->get_fields_xpath($html, $conf['selector'], $conf['name']); - } - elseif ($conf['selector_type']=='css') - { - $values = $this->get_fields_css($html, $conf['selector'], $conf['name']); - } - elseif ($conf['selector_type']=='regex') - { - $values = $this->get_fields_regex($html, $conf['selector'], $conf['name']); - } - - // field不为空而且存在子配置 - if (isset($values) && !empty($conf['children'])) - { - // 如果提取到的结果是字符串,就转为数组,方便下面统一foreach - if (!is_array($values)) - { - $values = array($values); - } - $child_values = array(); - // 父项抽取到的html作为子项的提取内容 - foreach ($values as $child_html) - { - // 递归调用本方法, 所以多少子项目都支持 - $child_value = $this->get_fields($conf['children'], $child_html, $url, $page); - if (!empty($child_value)) - { - $child_values[] = $child_value; - } - } - // 有子项就存子项的数组, 没有就存HTML代码块 - if (!empty($child_values)) - { - $values = $child_values; - } - } - } - - if (!isset($values)) - { - // 如果值为空而且值设置为必须项, 跳出foreach循环 - if ($required) - { - log::warn("Selector {$conf['name']}[{$conf['selector']}] not found, It's a must"); - // 清空整个 fields,当前页面就等于略过了 - $fields = array(); - break; - } - // 避免内容分页时attached_url拼接时候string + array了 - $fields[$conf['name']] = ''; - //$fields[$conf['name']] = array(); - } - else - { - if (is_array($values)) - { - if ($repeated) - { - $fields[$conf['name']] = $values; - } - else - { - $fields[$conf['name']] = $values[0]; - } - } - else - { - $fields[$conf['name']] = $values; - } - // 不重复抽取则只取第一个元素 - //$fields[$conf['name']] = $repeated ? $values : $values[0]; - } - } - - if (!empty($fields)) - { - foreach ($fields as $fieldname => $data) - { - $pattern = "/ \r\n\t]{1,}/isu"; - /*$pattern = "//i"; */ - // 在抽取到field内容之后调用, 对其中包含的img标签进行回调处理 - if ($this->on_handle_img && preg_match($pattern, $data)) - { - $return = call_user_func($this->on_handle_img, $fieldname, $data); - if (!isset($return)) - { - log::warn("on_handle_img return value can't be empty\n"); - } - else - { - // 有数据才会执行 on_handle_img 方法, 所以这里不要被替换没了 - $data = $return; - } - } - - // 当一个field的内容被抽取到后进行的回调, 在此回调中可以对网页中抽取的内容作进一步处理 - if ($this->on_extract_field) - { - $return = call_user_func($this->on_extract_field, $fieldname, $data, $page); - if (!isset($return)) - { - log::warn("on_extract_field return value can't be empty\n"); - } - else - { - // 有数据才会执行 on_extract_field 方法, 所以这里不要被替换没了 - $fields[$fieldname] = $return; - } - } - } - } - - return $fields; - } - - /** - * 验证导出 - * - * @return void - * @author seatle - * @created time :2016-10-02 23:37 - */ - public function check_export() - { - // 如果设置了导出选项 - if (!empty(self::$configs['export'])) - { - if (self::$export_type == 'csv') - { - if (empty(self::$export_file)) - { - log::error('Export data into CSV files need to Set the file path.'); - exit; - } - } - elseif (self::$export_type == 'sql') - { - if (empty(self::$export_file)) - { - log::error('Export data into SQL files need to Set the file path.'); - exit; - } - } - elseif (self::$export_type == 'db') - { - if (!function_exists('mysqli_connect')) - { - log::error('Export data to a database need Mysql support, unable to load mysqli extension.'); - exit; - } - - if (empty(self::$db_config)) - { - log::error('Export data to a database need Mysql support, you have not set a config array for connect.'); - exit; - } - - $config = self::$db_config; - @mysqli_connect($config['host'], $config['user'], $config['pass'], $config['name'], $config['port']); - if(mysqli_connect_errno()) - { - log::error('Export data to a database need Mysql support, '.mysqli_connect_error()); - exit; - } - - db::set_connect('default', $config); - db::_init(); - - if (!db::table_exists(self::$export_table)) - { - log::error('Table '.self::$export_table.' does not exist'); - exit; - } - } - } - } - - public function check_cache() - { - if ( !self::$use_redis || self::$save_running_state) - { - return false; - } - - // 这个位置要改 - //$keys = queue::keys("*"); - //$count = count($keys); - // 直接检查db,清空的时候整个db清空,所以注意db不要跟其他项目混用 - $count = queue::dbsize(); - if ( $count > 0 ) - { - // After this operation, 4,318 kB of additional disk space will be used. - // Do you want to continue? [Y/n] - //$msg = "发现Redis中有采集数据, 是否继续执行, 不继续则清空Redis数据重新采集\n"; - $msg = "Found that the data of Redis, no continue will empty Redis data start again\n"; - $msg .= 'Do you want to continue? [Y/n]'; - fwrite(STDOUT, $msg); - $arg = strtolower(trim(fgets(STDIN))); - $arg = empty($arg) || !in_array($arg, array('Y', 'N', 'y','n')) ? 'y' : strtolower($arg); - if ($arg == 'n') - { - log::warn('Clear redis data...'); - queue::flushdb(); - // 下面这种性能太差了 - //foreach ($keys as $key) - //{ - //$key = str_replace(self::$queue_config['prefix'].':', '', $key); - //queue::del($key); - //} - } - } - } - - public function init_redis() - { - if (!self::$use_redis) - { - return false; - } - - // 添加当前服务器到服务器列表 - $this->add_server_list(self::$serverid, self::$tasknum); - - // 删除当前服务器的任务状态 - // 对于被强制退出的进程有用 - for ($i = 1; $i <= self::$tasknum; $i++) - { - $this->del_task_status(self::$serverid, $i); - } - } - - /** - * 设置任务状态, 主进程和子进程每成功采集一个页面后调用 - * - * @return void - * @author seatle - * @created time :2016-10-30 23:56 - */ - public function set_task_status() - { - // 每采集成功一个页面, 生成当前进程状态到文件, 供主进程使用 - $mem = round(memory_get_usage(true)/(1024*1024),2); - $use_time = microtime(true) - self::$time_start; - $speed = round((self::$collect_succ + self::$collect_fail) / $use_time, 2); - $status = array( - 'id' => self::$taskid, - 'pid' => self::$taskpid, - 'mem' => $mem, - 'collect_succ' => self::$collect_succ, - 'collect_fail' => self::$collect_fail, - 'speed' => $speed, - ); - $task_status = json_encode($status); - - if (self::$use_redis) - { - $key = 'server-'.self::$serverid.'-task_status-'.self::$taskid; - queue::set($key, $task_status); - } - else - { - self::$task_status = array($task_status); - } - } - - /** - * 删除任务状态 - * - * @return void - * @author seatle - * @created time :2016-11-16 11:06 - */ - public function del_task_status($serverid, $taskid) - { - if (!self::$use_redis) - { - return false; - } - $key = "server-{$serverid}-task_status-{$taskid}"; - queue::del($key); - } - - /** - * 获得任务状态, 主进程才会调用 - * - * @return void - * @author seatle - * @created time :2016-10-30 23:56 - */ - public function get_task_status($serverid, $taskid) - { - if (!self::$use_redis) - { - return false; - } - - $key = "server-{$serverid}-task_status-{$taskid}"; - $task_status = queue::get($key); - return $task_status; - } - - /** - * 获得任务状态, 主进程才会调用 - * - * @return void - * @author seatle - * @created time :2016-10-30 23:56 - */ - public function get_task_status_list($serverid = 1, $tasknum) - { - $task_status = array(); - if (self::$use_redis) - { - for ($i = 1; $i <= $tasknum; $i++) - { - $key = "server-{$serverid}-task_status-".$i; - $task_status[] = queue::get($key); - } - } - else - { - $task_status = self::$task_status; - } - return $task_status; - } - - /** - * 添加当前服务器信息到服务器列表 - * - * @return void - * @author seatle - * @created time :2016-11-16 11:06 - */ - public function add_server_list($serverid, $tasknum) - { - if (!self::$use_redis) - { - return false; - } - - // 更新服务器列表 - $server_list_json = queue::get('server_list'); - $server_list = array(); - if ( ! $server_list_json) - { - $server_list[$serverid] = array( - 'serverid' => $serverid, - 'tasknum' => $tasknum, - 'time' => time(), - ); - } - else - { - $server_list = json_decode($server_list_json, true); - $server_list[$serverid] = array( - 'serverid' => $serverid, - 'tasknum' => $tasknum, - 'time' => time(), - ); - ksort($server_list); - } - queue::set('server_list', json_encode($server_list)); - } - - /** - * 从服务器列表中删除当前服务器信息 - * - * @return void - * @author seatle - * @created time :2016-11-16 11:06 - */ - public function del_server_list($serverid) - { - if (!self::$use_redis) - { - return false; - } - - $server_list_json = queue::get('server_list'); - $server_list = array(); - if ($server_list_json) - { - $server_list = json_decode($server_list_json, true); - if (isset($server_list[$serverid])) - { - unset($server_list[$serverid]); - } - - // 删除完当前的任务列表如果还存在,就更新一下Redis - if (!empty($server_list)) - { - ksort($server_list); - queue::set('server_list', json_encode($server_list)); - } - } - } - - /** - * 获取等待爬取页面数量 - * - * @param mixed $url - * @return void - * @author seatle - * @created time :2016-09-23 17:13 - */ - public function get_collect_url_num() - { - if (self::$use_redis) - { - $count = queue::get('collect_urls_num'); - } - else - { - $count = self::$collect_urls_num; - } - return $count; - } - - /** - * 获取已经爬取页面数量 - * - * @param mixed $url - * @return void - * @author seatle - * @created time :2016-09-23 17:13 - */ - public function get_collected_url_num() - { - if (self::$use_redis) - { - $count = queue::get('collected_urls_num'); - } - else - { - $count = self::$collected_urls_num; - } - return $count; - } - - /** - * 已采集页面数量加一 - * - * @param mixed $url - * @return void - * @author seatle - * @created time :2016-09-23 17:13 - */ - public function incr_collected_url_num($url) - { - if (self::$use_redis) - { - queue::incr('collected_urls_num'); - } - else - { - self::$collected_urls_num++; - } - } - - /** - * 从队列左边插入 - * - * @return void - * @author seatle - * @created time :2016-09-23 17:13 - */ - public function queue_lpush($link = array(), $allowed_repeat = false) - { - if (empty($link) || empty($link['url'])) - { - return false; - } - - $url = $link['url']; - $link = $this->link_compress($link); - - $status = false; - if (self::$use_redis) - { - $key = 'collect_urls-'.md5($url); - $lock = 'lock-'.$key; - // 加锁: 一个进程一个进程轮流处理 - if (queue::lock($lock)) - { - $exists = queue::exists($key); - // 不存在或者当然URL可重复入 - if (!$exists || $allowed_repeat) - { - // 待爬取网页记录数加一 - queue::incr('collect_urls_num'); - // 先标记为待爬取网页 - queue::set($key, time()); - // 入队列 - $link = json_encode($link); - //根据采集设置为顺序采集还是随机采集,使用列表或集合对象 2018-5 BY KEN - if (self::$configs['queue_order'] == 'rand') - { - queue::sadd('collect_queue', $link); - } - else - { - queue::lpush('collect_queue', $link); - } - $status = true; - } - // 解锁 - queue::unlock($lock); - } - } - else - { - $key = md5($url); - if (!array_key_exists($key, self::$collect_urls)) - { - self::$collect_urls_num++; - self::$collect_urls[$key] = time(); - array_push(self::$collect_queue, $link); - $status = true; - } - } - return $status; - } - - /** - * 从队列右边插入 - * - * @return void - * @author seatle - * @created time :2016-09-23 17:13 - */ - public function queue_rpush($link = array(), $allowed_repeat = false) - { - if (empty($link) || empty($link['url'])) - { - return false; - } - - $url = $link['url']; - - $status = false; - if (self::$use_redis) - { - $key = 'collect_urls-'.md5($url); - $lock = 'lock-'.$key; - // 加锁: 一个进程一个进程轮流处理 - if (queue::lock($lock)) - { - $exists = queue::exists($key); - // 不存在或者当然URL可重复入 - if ( ! $exists || $allowed_repeat) - { - // 待爬取网页记录数加一 - queue::incr('collect_urls_num'); - // 先标记为待爬取网页 - queue::set($key, time()); - // 入队列 - $link = json_encode($link); - //根据采集设置为顺序采集还是随机采集,使用列表或集合对象 2018-5 BY KEN - if (self::$configs['queue_order'] == 'rand') - { - queue::sadd('collect_queue', $link); //无序集合 - } - else - { - queue::rpush('collect_queue', $link); //有序列表 - } - $status = true; - } - // 解锁 - queue::unlock($lock); - } - } - else - { - $key = md5($url); - if (!array_key_exists($key, self::$collect_urls)) - { - self::$collect_urls_num++; - self::$collect_urls[$key] = time(); - array_unshift(self::$collect_queue, $link); - $status = true; - } - } - return $status; - } - - /** - * 从队列左边取出 - * 后进先出 - * 可以避免采集内容页有分页的时候采集失败数据拼凑不全 - * 还可以按顺序采集列表页 - * - * @return void - * @author seatle - * @created time :2016-09-23 17:13 - */ - public function queue_lpop() - { - if (self::$use_redis) - { - //根据采集设置为顺序采集还是随机采集,使用列表或集合对象 - if (self::$configs['queue_order'] == 'rand') - { - $link = queue::spop('collect_queue'); - } - else - { - $link = queue::lpop('collect_queue'); - } - $link = json_decode($link, true); - } - else - { - $link = array_pop(self::$collect_queue); - } - return $link; - } - - /** - * 从队列右边取出 - * - * @return void - * @author seatle - * @created time :2016-09-23 17:13 - */ - public function queue_rpop() - { - if (self::$use_redis) - { - //根据采集设置为顺序采集还是随机采集,使用列表或集合对象 - if (self::$configs['queue_order'] == 'rand') - { - $link = queue::spop('collect_queue'); - } - else - { - $link = queue::rpop('collect_queue'); - } - $link = json_decode($link, true); - } - else - { - $link = array_shift(self::$collect_queue); - } - return $link; - } - - /** - * 队列长度 - * - * @return void - * @author seatle - * @created time :2016-09-23 17:13 - */ - public function queue_lsize() - { - if (self::$use_redis) - { - //根据采集设置为顺序采集还是随机采集,使用列表或集合对象 - if (self::$configs['queue_order'] == 'rand') - { - $lsize = queue::scard('collect_queue'); - } - else - { - $lsize = queue::lsize('collect_queue'); - } - } - else - { - $lsize = count(self::$collect_queue); - } - return $lsize; - } - - /** - * 采集深度加一 - * - * @return void - * @author seatle - * @created time :2016-09-23 17:13 - */ - public function incr_depth_num($depth) - { - if (self::$use_redis) - { - $lock = 'lock-depth_num'; - // 锁2秒 - if (queue::lock($lock, time(), 2)) - { - if (queue::get('depth_num') < $depth) - { - queue::set('depth_num', $depth); - } - - queue::unlock($lock); - } - } - else - { - if (self::$depth_num < $depth) - { - self::$depth_num = $depth; - } - } - } - - /** - * 获得采集深度 - * - * @return void - * @author seatle - * @created time :2016-09-23 17:13 - */ - public function get_depth_num() - { - if (self::$use_redis) - { - $depth_num = queue::get('depth_num'); - return $depth_num ? $depth_num : 0; - } - else - { - return self::$depth_num; - } - } - - /** - * 提取到的field数目加一 - * - * @return void - * @author seatle - * @created time :2016-09-23 17:13 - */ - public function incr_fields_num() - { - if (self::$use_redis) - { - $fields_num = queue::incr('fields_num'); - } - else - { - self::$fields_num++; - $fields_num = self::$fields_num; - } - return $fields_num; - } - - /** - * 提取到的field数目 - * - * @return void - * @author seatle - * @created time :2016-09-23 17:13 - */ - public function get_fields_num() - { - if (self::$use_redis) - { - $fields_num = queue::get('fields_num'); - } - else - { - $fields_num = self::$fields_num; - } - return $fields_num ? $fields_num : 0; - } - - /** - * 提取到的pages数目加一,用于限制单域名采集页数上限 - * - * @return void - * @author KEN - * @created time :2018-05 - */ - public function incr_pages_num($url = '') - { - if ( ! empty($url)) - { - $domain = $this->getRootDomain($url, 'host'); - } - if (empty($domain)) - { - $domain = 'all'; - } - if (self::$use_redis) - { - $pages_num[$domain] = queue::incr('pages_num:'.$domain); - } - else - { - if (empty(self::$pages_num[$domain])) - { - self::$pages_num[$domain] = 1; - } - else - { - self::$pages_num[$domain]++; - } - $pages_num[$domain] = self::$pages_num[$domain]; - } - return $pages_num[$domain]; - } - - /** - * 超过1秒的慢速采集时间计数,用于限制单域名总采集时间上限 - * - * @return void - * @author KEN - * @created time :2018-05 - */ - public function incr_duration_num($url = '', $time_run = 1) - { - if ( ! empty($url)) - { - $domain = $this->getRootDomain($url); - } - if (empty($domain)) - { - $domain = 'all'; - } - if (self::$use_redis) - { - $duration[$domain] = queue::incr('duration:'.$domain, $time_run); - } - else - { - if (empty(self::$duration[$domain])) - { - self::$duration[$domain] = $time_run; - } - else - { - self::$duration[$domain] += $time_run; - } - $duration[$domain] = self::$duration[$domain]; - } - return $duration[$domain]; - } - - /** - * 读取单域名总慢速采集(响应超过1秒)的时间 - * - * @return void - * @author KEN - * @created time :2018-04 - */ - public function get_duration_num($url = '') - { - if ( ! empty($url)) - { - $domain = $this->getRootDomain($url); - } - if (empty($domain)) - { - $domain = 'all'; - } - if (self::$use_redis) - { - $duration[$domain] = queue::get('duration:'.$domain); - } - else - { - $duration[$domain] = ! empty(self::$duration[$domain]) ? self::$duration[$domain] : 0; - } - return $duration[$domain] ? $duration[$domain] : 0; - } - - /** - * 单 host 当前并发计数 - * @return int - * @author KEN - * @created time :2018-05-28 16:40 - */ - public function incr_task_per_host($url = '', $type = 'incr') - { - if (empty($url)) - { - return false; - } - $domain = $this->getRootDomain($url, 'host'); - if (empty($domain)) - { - return false; - } - if (self::$use_redis) - { - if ($type == 'decr') - { - $task_per_host_counter[$domain] = queue::decr('task_per_host:'.$domain); - } - else - { - $task_per_host_counter[$domain] = queue::incr('task_per_host:'.$domain); - } - } - else - { - - if (empty(self::$task_per_host_counter[$domain])) - { - self::$task_per_host_counter[$domain] = 1; - } - else - { - if ($type == 'decr') - { - self::$task_per_host_counter[$domain]--; - } - else - { - self::$task_per_host_counter[$domain]++; - } - } - $task_per_host_counter[$domain] = self::$task_per_host_counter[$domain]; - } - return $task_per_host_counter[$domain]; - } - - //获取url所属 host 当前并发数量 KEN - public function get_task_per_host_num($url) - { - if (empty($url)) - { - return 0; - } - $domain = $this->getRootDomain($url, 'host'); - if (empty($domain)) - { - return 0; - } - if (self::$use_redis) - { - $count = queue::get('task_per_host:'.$domain); - } - else - { - $count = self::$task_per_host_counter[$domain]; - } - return $count; - } - - /** - * 采用xpath分析提取字段 - * - * @param mixed $html - * @param mixed $selector - * @return void - * @author seatle - * @created time :2016-09-18 10:17 - */ - public function get_fields_xpath($html, $selector, $fieldname) - { - $result = selector::select($html, $selector); - if (selector::$error) - { - log::error("Field(\"{$fieldname}\") ".selector::$error."\n"); - } - return $result; - } - - /** - * 采用正则分析提取字段 - * - * @param mixed $html - * @param mixed $selector - * @return void - * @author seatle - * @created time :2016-09-18 10:17 - */ - public function get_fields_regex($html, $selector, $fieldname) - { - $result = selector::select($html, $selector, 'regex'); - if (selector::$error) - { - log::error("Field(\"{$fieldname}\") ".selector::$error."\n"); - } - return $result; - } - - /** - * 采用CSS选择器提取字段 - * - * @param mixed $html - * @param mixed $selector - * @param mixed $fieldname - * @return void - * @author seatle - * @created time :2016-09-18 10:17 - */ - public function get_fields_css($html, $selector, $fieldname) - { - $result = selector::select($html, $selector, 'css'); - if (selector::$error) - { - log::error("Field(\"{$fieldname}\") ".selector::$error."\n"); - } - return $result; - } - - /** - * 清空shell输出内容 - * - * @return void - * @author seatle - * @created time :2016-11-16 11:06 - */ - public function clear_echo() - { - $arr = array(27, 91, 72, 27, 91, 50, 74); - foreach ($arr as $a) - { - print chr($a); - } - //array_map(create_function('$a', 'print chr($a);'), array(27, 91, 72, 27, 91, 50, 74)); - } - - /** - * 替换shell输出内容 - * - * @param mixed $message - * @param mixed $force_clear_lines - * @return void - * @author seatle - * @created time :2016-11-16 11:06 - */ - public function replace_echo($message, $force_clear_lines = NULL) - { - static $last_lines = 0; - - if(!is_null($force_clear_lines)) - { - $last_lines = $force_clear_lines; - } - - // 获取终端宽度 - $toss = $status = null; - $term_width = exec('tput cols', $toss, $status); - if($status || empty($term_width)) - { - $term_width = 64; // Arbitrary fall-back term width. - } - - $line_count = 0; - foreach(explode("\n", $message) as $line) - { - $line_count += count(str_split($line, $term_width)); - } - - // Erasure MAGIC: Clear as many lines as the last output had. - for($i = 0; $i < $last_lines; $i++) - { - // Return to the beginning of the line - echo "\r"; - // Erase to the end of the line - echo "\033[K"; - // Move cursor Up a line - echo "\033[1A"; - // Return to the beginning of the line - echo "\r"; - // Erase to the end of the line - echo "\033[K"; - // Return to the beginning of the line - echo "\r"; - // Can be consolodated into - // echo "\r\033[K\033[1A\r\033[K\r"; - } - - $last_lines = $line_count; - - echo $message."\n"; - } - - /** - * 展示启动界面, Windows 不会到这里来 - * @return void - */ - public function display_ui() - { - $loadavg = sys_getloadavg(); - foreach ($loadavg as $k=>$v) - { - $loadavg[$k] = round($v, 2); - } - $display_str = "\033[1A\n\033[K-----------------------------\033[47;30m PHPSPIDER \033[0m-----------------------------\n\033[0m"; - //$display_str = "-----------------------------\033[47;30m PHPSPIDER \033[0m-----------------------------\n\033[0m"; - $run_time_str = util::time2second(time() - self::$time_start, false); - $display_str .= 'PHPSpider version:'.self::VERSION.' PHP version:'.PHP_VERSION."\n"; - $display_str .= 'start time:'.date('Y-m-d H:i:s', self::$time_start).' run '.$run_time_str." \n"; - - $display_str .= 'spider name: '.self::$configs['name']."\n"; - if (self::$multiserver) - { - $display_str .= 'server id: '.self::$serverid."\n"; - } - $display_str .= 'task number: '.self::$tasknum."\n"; - $display_str .= 'load average: '.implode(', ', $loadavg)."\n"; - $display_str .= "document: https://doc.phpspider.org\n"; - - $display_str .= $this->display_task_ui(); - - if (self::$multiserver) - { - $display_str .= $this->display_server_ui(); - } - - $display_str .= $this->display_collect_ui(); - - // 清屏 - //$this->clear_echo(); - // 返回到第一行,第一列 - //echo "\033[0;0H"; - $display_str .= "---------------------------------------------------------------------\n"; - $display_str .= 'Press Ctrl-C to quit. Start success.'.date('Y-m-d H:i:s').' - '.round(memory_get_usage() / 1024 / 1024, 2).'MB'."\n"; - if (self::$terminate) - { - $display_str .= "\n\033[33mWait for the process exits...\033[0m"; - } - //echo $display_str; - $this->replace_echo($display_str); - } - - public function display_task_ui() - { - $display_str = "-------------------------------\033[47;30m TASKS \033[0m-------------------------------\n"; - - $display_str .= "\033[47;30mtaskid\033[0m". str_pad('', self::$taskid_length+2-strlen('taskid')). - "\033[47;30mtaskpid\033[0m". str_pad('', self::$pid_length+2-strlen('taskpid')). - "\033[47;30mmem\033[0m". str_pad('', self::$mem_length+2-strlen('mem')). - "\033[47;30mcollect succ\033[0m". str_pad('', self::$urls_length-strlen('collect succ')). - "\033[47;30mcollect fail\033[0m". str_pad('', self::$urls_length-strlen('collect fail')). - "\033[47;30mspeed\033[0m". str_pad('', self::$speed_length+2-strlen('speed')). - "\n"; - - // "\033[32;40m [OK] \033[0m" - $task_status = $this->get_task_status_list(self::$serverid, self::$tasknum); - foreach ($task_status as $json) - { - $task = json_decode($json, true); - if (empty($task)) - { - continue; - } - $display_str .= str_pad($task['id'], self::$taskid_length + 2). - str_pad($task['pid'], self::$pid_length + 2). - str_pad($task['mem'].'MB', self::$mem_length + 2). - str_pad($task['collect_succ'], self::$urls_length). - str_pad($task['collect_fail'], self::$urls_length). - str_pad($task['speed'].'/s', self::$speed_length + 2). - "\n"; - } - //echo "\033[9;0H"; - return $display_str; - } - - public function display_server_ui() - { - $display_str = "-------------------------------\033[47;30m SERVER \033[0m------------------------------\n"; - - $display_str .= "\033[47;30mserver\033[0m". str_pad('', self::$server_length+2-strlen('serverid')). - "\033[47;30mtasknum\033[0m". str_pad('', self::$tasknum_length+2-strlen('tasknum')). - "\033[47;30mmem\033[0m". str_pad('', self::$mem_length+2-strlen('mem')). - "\033[47;30mcollect succ\033[0m". str_pad('', self::$urls_length-strlen('collect succ')). - "\033[47;30mcollect fail\033[0m". str_pad('', self::$urls_length-strlen('collect fail')). - "\033[47;30mspeed\033[0m". str_pad('', self::$speed_length+2-strlen('speed')). - "\n"; - - $server_list_json = queue::get('server_list'); - $server_list = json_decode($server_list_json, true); - foreach ($server_list as $server) - { - $serverid = $server['serverid']; - $tasknum = $server['tasknum']; - $mem = 0; - $speed = 0; - $collect_succ = $collect_fail = 0; - $task_status = $this->get_task_status_list($serverid, $tasknum); - foreach ($task_status as $json) - { - $task = json_decode($json, true); - if (empty($task)) - { - continue; - } - $mem += $task['mem']; - $speed += $task['speed']; - $collect_fail += $task['collect_fail']; - $collect_succ += $task['collect_succ']; - } - - $display_str .= str_pad($serverid, self::$server_length). - str_pad($tasknum, self::$tasknum_length + 2). - str_pad($mem.'MB', self::$mem_length + 2). - str_pad($collect_succ, self::$urls_length). - str_pad($collect_fail, self::$urls_length). - str_pad($speed.'/s', self::$speed_length + 2). - "\n"; - } - return $display_str; - } - - public function display_collect_ui() - { - $display_str = "---------------------------\033[47;30m COLLECT STATUS \033[0m--------------------------\n"; - - $display_str .= "\033[47;30mfind pages\033[0m". str_pad('', 16-strlen('find pages')). - "\033[47;30mqueue\033[0m". str_pad('', 14-strlen('queue')). - "\033[47;30mcollected\033[0m". str_pad('', 15-strlen('collected')). - "\033[47;30mfields\033[0m". str_pad('', 15-strlen('fields')). - "\033[47;30mdepth\033[0m". str_pad('', 12-strlen('depth')). - "\n"; - - $collect = $this->get_collect_url_num(); - $collected = $this->get_collected_url_num(); - $queue = $this->queue_lsize(); - $fields = $this->get_fields_num(); - $depth = $this->get_depth_num(); - $display_str .= str_pad($collect, 16); - $display_str .= str_pad($queue, 14); - $display_str .= str_pad($collected, 15); - $display_str .= str_pad($fields, 15); - $display_str .= str_pad($depth, 12); - $display_str .= "\n"; - return $display_str; - } - - /** - * 判断是否附件文件 - * - * @return void - * @author seatle - * @created time :2016-09-23 17:13 - */ - //public function is_attachment_file($url) - //{ - //$mime_types = $GLOBALS['config']['mimetype']; - //$mime_types_flip = array_flip($mime_types); - - //$pathinfo = pathinfo($url); - //$fileext = isset($pathinfo['extension']) ? $pathinfo['extension'] : ''; - - //$fileinfo = array(); - //// 存在文件后缀并且是配置里面的后缀 - //if (!empty($fileext) && isset($mime_types_flip[$fileext])) - //{ - //stream_context_set_default( - //array( - //'http' => array( - //'method' => 'HEAD' - //) - //) - //); - //// 代理和Cookie以后实现, 方法和 file_get_contents 一样 使用 stream_context_create 设置 - //$headers = get_headers($url, 1); - //if (strpos($headers[0], '302')) - //{ - //$url = $headers['Location']; - //$headers = get_headers($url, 1); - //} - ////print_r($headers); - //$fileinfo = array( - //'basename' => isset($pathinfo['basename']) ? $pathinfo['basename'] : '', - //'filename' => isset($pathinfo['filename']) ? $pathinfo['filename'] : '', - //'fileext' => isset($pathinfo['extension']) ? $pathinfo['extension'] : '', - //'filesize' => isset($headers['Content-Length']) ? $headers['Content-Length'] : 0, - //'atime' => isset($headers['Date']) ? strtotime($headers['Date']) : time(), - //'mtime' => isset($headers['Last-Modified']) ? strtotime($headers['Last-Modified']) : time(), - //); - - //$mime_type = 'html'; - //$content_type = isset($headers['Content-Type']) ? $headers['Content-Type'] : ''; - //if (!empty($content_type)) - //{ - //$mime_type = isset($GLOBALS['config']['mimetype'][$content_type]) ? $GLOBALS['config']['mimetype'][$content_type] : $mime_type; - //} - //$mime_types_flip = array_flip($mime_types); - //// 判断一下是不是文件名被加什么后缀了, 比如 http://www.xxxx.com/test.jpg?token=xxxxx - //if (!isset($mime_types_flip[$fileinfo['fileext']])) - //{ - //$fileinfo['fileext'] = $mime_type; - //$fileinfo['basename'] = $fileinfo['filename'].'.'.$mime_type; - //} - //} - //return $fileinfo; - //} - - //返回当前是否是主进程 - public function is_taskmaster() - { - return self::$taskmaster; - } - - //返回当前是否进程ID - public function get_task_id() - { - return self::$taskid; - } - - //检测子域名数量 - public function sub_domain_count($url) - { - if (empty($url)) - { - return 0; - } - $count = 0; - $domain = $this->getRootDomain($url, 'root'); - if (empty($domain)) - { - return 0; - } - $host = $this->getRootDomain($url, 'host'); - if (empty($host)) - { - return $count; - } - if (self::$use_redis) - { - $count = queue::get($domain); - if ( ! empty(self::$configs['max_sub_num']) and $count > self::$configs['max_sub_num']) - { - return $count; - } - if (strlen($host) > 32) - { - $host = md5($host); - } - $hostkey = 'sub_d-'.$host; - $exists = queue::exists($hostkey); - if ( ! $exists) - { - // 子域名数量加一 - $count = queue::incr($domain); - queue::set($hostkey, 1); - } - } - return $count; - } - - //提取url的根域名 host domain subdomain name tld - public function getRootDomain($url = '', $type = 'root', $domain_check = false) - { - if (empty($url)) - { - return $url; - } - $url = trim($url); - if ( ! preg_match('/^http/i', $url)) - { - $url = 'http://'.$url; - } - //截取限定字符 - $arr = array(); - if (preg_match_all('/(^https?:\/\/[\p{Han}a-zA-Z0-9\-\.\/]+)/iu', $url, $arr)) - { - $url = $arr['0']['0']; - unset($arr); - } - $url_parse = parse_url(strtolower($url)); - if (empty($url_parse['host'])) - { - return ''; - } - //host判断快速返回 - if ($domain_check === false and $type == 'host') - { - return $url_parse['host']; - } - - //结束数组初始化 - $res = array( - 'scheme' => '', - 'host' => '', - 'path' => '', - 'name' => '', - 'domain' => '', - ); - - $urlarr = explode('.', $url_parse['host']); - $count = count($urlarr); - $res['scheme'] = $url_parse['scheme']; - $res['host'] = $url_parse['host']; - if ( ! empty($url_parse['path'])) - { - $res['path'] = $url_parse['path']; - } - #列举域名中固定元素 - $state_domain = array('com', 'edu', 'gov', 'int', 'mil', 'net', 'org', 'biz', 'info', 'pro', 'name', 'coop', 'aero', 'xxx', 'idv', 'mobi', 'cc', 'me', 'jp', 'uk', 'ws', 'eu', 'pw', 'kr', 'io', 'us', 'cn', 'al', 'dz', 'af', 'ar', 'ae', 'aw', 'om', 'az', 'eg', 'et', 'ie', 'ee', 'ad', 'ao', 'ai', 'ag', 'at', 'au', 'mo', 'bb', 'pg', 'bs', 'pk', 'py', 'ps', 'bh', 'pa', 'br', 'by', 'bm', 'bg', 'mp', 'bj', 'be', 'is', 'pr', 'ba', 'pl', 'bo', 'bz', 'bw', 'bt', 'bf', 'bi', 'bv', 'kp', 'gq', 'dk', 'de', 'tl', 'tp', 'tg', 'dm', 'do', 'ru', 'ec', 'er', 'fr', 'fo', 'pf', 'gf', 'tf', 'va', 'ph', 'fj', 'fi', 'cv', 'fk', 'gm', 'cg', 'cd', 'co', 'cr', 'gg', 'gd', 'gl', 'ge', 'cu', 'gp', 'gu', 'gy', 'kz', 'ht', 'nl', 'an', 'hm', 'hn', 'ki', 'dj', 'kg', 'gn', 'gw', 'ca', 'gh', 'ga', 'kh', 'cz', 'zw', 'cm', 'qa', 'ky', 'km', 'ci', 'kw', 'hr', 'ke', 'ck', 'lv', 'ls', 'la', 'lb', 'lt', 'lr', 'ly', 'li', 're', 'lu', 'rw', 'ro', 'mg', 'im', 'mv', 'mt', 'mw', 'my', 'ml', 'mk', 'mh', 'mq', 'yt', 'mu', 'mr', 'um', 'as', 'vi', 'mn', 'ms', 'bd', 'pe', 'fm', 'mm', 'md', 'ma', 'mc', 'mz', 'mx', 'nr', 'np', 'ni', 'ne', 'ng', 'nu', 'no', 'nf', 'na', 'za', 'aq', 'gs', 'pn', 'pt', 'se', 'ch', 'sv', 'yu', 'sl', 'sn', 'cy', 'sc', 'sa', 'cx', 'st', 'sh', 'kn', 'lc', 'sm', 'pm', 'vc', 'lk', 'sk', 'si', 'sj', 'sz', 'sd', 'sr', 'sb', 'so', 'tj', 'tw', 'th', 'tz', 'to', 'tc', 'tt', 'tn', 'tv', 'tr', 'tm', 'tk', 'wf', 'vu', 'gt', 've', 'bn', 'ug', 'ua', 'uy', 'uz', 'es', 'eh', 'gr', 'hk', 'sg', 'nc', 'nz', 'hu', 'sy', 'jm', 'am', 'ac', 'ye', 'iq', 'ir', 'il', 'it', 'in', 'id', 'vg', 'jo', 'vn', 'zm', 'je', 'td', 'gi', 'cl', 'cf', 'yr', 'arpa', 'museum', 'asia', 'ax', 'bl', 'bq', 'cat', 'cw', 'gb', 'jobs', 'mf', 'rs', 'su', 'sx', 'tel', 'travel', 'shop', 'ltd', 'store', 'vip', '网店', '中国', '公司', '网络', 'co.il', 'co.nz', 'co.uk', 'me.uk', 'org.uk', 'com.sb', '在线', '中文网', '移动', 'wang', 'club', 'ren', 'top', 'website', 'cool', 'company', 'city', 'email', 'market', 'software', 'ninja', '我爱你', 'bike', 'today', 'life', 'space', 'pub', 'site', 'help', 'link', 'photo', 'video', 'click', 'pics', 'sexy', 'audio', 'gift', 'tech', '网址', 'online', 'win', 'download', 'party', 'bid', 'loan', 'date', 'trade', 'red', 'blue', 'pink', 'poker', 'green', 'farm', 'zone', 'guru', 'tips', 'land', 'care', 'camp', 'cab', 'cash', 'limo', 'toys', 'tax', 'town', 'fish', 'fund', 'fail', 'house', 'shoes', 'media', 'guide', 'tools', 'solar', 'watch', 'cheap', 'rocks', 'news', 'live', 'lawyer', 'host', 'wiki', 'ink', 'design', 'lol', 'hiphop', 'hosting', 'diet', 'flowers', 'car', 'cars', 'auto', 'mom', 'cq', 'he', 'nm', 'ln', 'jl', 'hl', 'js', 'zj', 'ah', 'jx', 'ha', 'hb', 'gx', 'hi', 'gz', 'yn', 'xz', 'qh', 'nx', 'xj', 'xyz', 'xin', 'science', 'press', 'band', 'engineer', 'social', 'studio', 'work', 'game', 'kim', 'games', 'group', '集团'); - if ($count <= 2) - { - #当域名直接根形式不存在host部分直接输出 - $last = array_pop($urlarr); - $last_1 = array_pop($urlarr); - if (in_array($last, $state_domain)) - { - $res['domain'] = $last_1.'.'.$last; - $res['name'] = $last_1; - $res['tld'] = $last; - } - } - elseif ($count > 2) - { - $last = array_pop($urlarr); - $last_1 = array_pop($urlarr); - $last_2 = array_pop($urlarr); - $res['domain'] = $last_1.'.'.$last; //默认为n.com形式 - $res['name'] = $last_2; - - //排除非标准 ltd 域名 - if ( ! in_array($last, $state_domain)) - { - return false; - } - - if (in_array($last, $state_domain)) - { - $res['domain'] = $last_1.'.'.$last; //n.com形式 - $res['name'] = $last_1; - $res['tld'] = $last; - } - //排除顶级根二级后缀 - if ($last_1 !== $last and in_array($last_1, $state_domain) and ! in_array($last, array('com', 'net', 'org', 'edu', 'gov'))) - { - $res['domain'] = $last_2.'.'.$last_1.'.'.$last; //n.n.com形式 - $res['name'] = $last_2; - $res['tld'] = $last_1.'.'.$last; - } - //限定cn顶级根二级后缀为'com', 'net', 'org', 'edu', 'gov' - if (in_array($last, array('cn')) and $last_1 !== $last and strlen($last_1) > 2 and ! in_array($last_1, array('com', 'net', 'org', 'edu', 'gov'))) - { - $res['domain'] = $last_1.'.'.$last; //n.n.cn形式 - $res['name'] = $last_1; - $res['tld'] = $last; - } - } - - //检测和验证返回的是不是域名格式 - if ( ! empty($res['domain']) and preg_match('/^([\p{Han}a-zA-Z0-9])+([\p{Han}a-zA-Z0-9\-])*\.[a-zA-Z\.\p{Han}]+$/iu', $res['domain'])) - { - if ($type == 'arr') - { - return $res; - } - elseif ($type == 'host') - { - return $res['host']; - } - elseif ($type == 'tld') - { - return $res['tld']; - } - elseif ($type == 'subdomain') - { - return $res['name']; - } - else - { - return $res['domain']; - } - } - else - { - return ''; - } - } - -} diff --git a/vendor/owner888/phpspider/core/queue.php b/vendor/owner888/phpspider/core/queue.php deleted file mode 100644 index 3ea4149..0000000 --- a/vendor/owner888/phpspider/core/queue.php +++ /dev/null @@ -1,1388 +0,0 @@ - -// +---------------------------------------------------------------------- - -//---------------------------------- -// PHPSpider Redis操作类文件 -//---------------------------------- - -namespace phpspider\core; - -use Exception; -use Redis; - -class queue -{ - /** - * redis链接标识符号 - */ - protected static $redis = NULL; - - /** - * redis配置数组 - */ - protected static $configs = array(); - private static $links = array(); - private static $link_name = 'default'; - - /** - * 默认redis前缀 - */ - public static $prefix = 'phpspider'; - - public static $error = ''; - - public static function init() - { - if ( ! extension_loaded('redis')) - { - self::$error = 'The redis extension was not found'; - return false; - } - - // 获取配置 - $config = self::$link_name == 'default' ? self::_get_default_config() : self::$configs[self::$link_name]; - - // 如果当前链接标识符为空,或者ping不同,就close之后重新打开 - //if ( empty(self::$links[self::$link_name]) || !self::ping() ) - if (empty(self::$links[self::$link_name])) - { - self::$links[self::$link_name] = new Redis(); - if (strstr($config['host'], '.sock')) - { - if ( ! self::$links[self::$link_name]->connect($config['host'])) - { - self::$error = 'Unable to connect to redis server'; - unset(self::$links[self::$link_name]); - return false; - } - } - else - { - if ( ! self::$links[self::$link_name]->connect($config['host'], $config['port'], $config['timeout'])) - { - self::$error = 'Unable to connect to redis server'; - unset(self::$links[self::$link_name]); - return false; - } - } - - // 验证 - if ($config['pass']) - { - if ( ! self::$links[self::$link_name]->auth($config['pass'])) - { - self::$error = 'Redis Server authentication failed'; - unset(self::$links[self::$link_name]); - return false; - } - } - - $prefix = empty($config['prefix']) ? self::$prefix : $config['prefix']; - self::$links[self::$link_name]->setOption(Redis::OPT_PREFIX, $prefix.':'); - // 永不超时 - // ini_set('default_socket_timeout', -1); 无效,要用下面的做法 - self::$links[self::$link_name]->setOption(Redis::OPT_READ_TIMEOUT, -1); - self::$links[self::$link_name]->select($config['db']); - } - - return self::$links[self::$link_name]; - } - - public static function clear_link() - { - if(self::$links) - { - foreach(self::$links as $k=>$v) - { - $v->close(); - unset(self::$links[$k]); - } - } - } - - public static function set_connect($link_name, $config = array()) - { - self::$link_name = $link_name; - if (!empty($config)) - { - self::$configs[self::$link_name] = $config; - } - else - { - if (empty(self::$configs[self::$link_name])) - { - throw new Exception('You not set a config array for connect!'); - } - } - //print_r(self::$configs); - - //// 先断开原来的连接 - //if ( !empty(self::$links[self::$link_name]) ) - //{ - //self::$links[self::$link_name]->close(); - //self::$links[self::$link_name] = null; - //} - } - - public static function set_connect_default() - { - $config = self::_get_default_config(); - self::set_connect('default', $config); - } - - /** - * 获取默认配置 - */ - protected static function _get_default_config() - { - if (empty(self::$configs['default'])) - { - if (!is_array($GLOBALS['config']['redis'])) - { - exit('cls_redis.php _get_default_config()' . '没有redis配置'); - // You not set a config array for connect\nPlease check the configuration file config/inc_config.php - } - self::$configs['default'] = $GLOBALS['config']['redis']; - } - return self::$configs['default']; - } - - /** - * set - * - * @param mixed $key 键 - * @param mixed $value 值 - * @param int $expire 过期时间,单位:秒 - * @return void - * @author seatle - * @created time :2015-12-13 01:05 - */ - public static function set($key, $value, $expire = 0) - { - self::init(); - try - { - if ( self::$links[self::$link_name] ) - { - if ($expire > 0) - { - return self::$links[self::$link_name]->setex($key, $expire, $value); - } - else - { - return self::$links[self::$link_name]->set($key, $value); - } - } - } - catch (Exception $e) - { - $msg = "PHP Fatal error: Uncaught exception 'RedisException' with message '".$e->getMessage()."'\n"; - log::warn($msg); - if ($e->getCode() == 0) - { - self::$links[self::$link_name]->close(); - self::$links[self::$link_name] = null; - usleep(100000); - return self::set($key, $value, $expire); - } - } - return NULL; - } - - - /** - * set - * - * @param mixed $key 键 - * @param mixed $value 值 - * @param int $expire 过期时间,单位:秒 - * @return void - * @author seatle - * @created time :2015-12-13 01:05 - */ - public static function setnx($key, $value, $expire = 0) - { - self::init(); - try - { - if ( self::$links[self::$link_name] ) - { - if ($expire > 0) - { - return self::$links[self::$link_name]->set($key, $value, array('nx', 'ex' => $expire)); - //self::$links[self::$link_name]->multi(); - //self::$links[self::$link_name]->setNX($key, $value); - //self::$links[self::$link_name]->expire($key, $expire); - //self::$links[self::$link_name]->exec(); - //return true; - } - else - { - return self::$links[self::$link_name]->setnx($key, $value); - } - } - } - catch (Exception $e) - { - $msg = "PHP Fatal error: Uncaught exception 'RedisException' with message '".$e->getMessage()."'\n"; - log::warn($msg); - if ($e->getCode() == 0) - { - self::$links[self::$link_name]->close(); - self::$links[self::$link_name] = null; - usleep(100000); - return self::setnx($key, $value, $expire); - } - } - return NULL; - } - - /** - * 锁 - * 默认锁1秒 - * - * @param mixed $name 锁的标识名 - * @param mixed $value 锁的值,貌似没啥意义 - * @param int $expire 当前锁的最大生存时间(秒),必须大于0,超过生存时间系统会自动强制释放锁 - * @param int $interval 获取锁失败后挂起再试的时间间隔(微秒) - * @return void - * @author seatle - * @created time :2016-10-30 23:56 - */ - public static function lock($name, $value = 1, $expire = 5, $interval = 100000) - { - if ($name == null) - { - return false; - } - - self::init(); - try - { - if ( self::$links[self::$link_name] ) - { - $key = "Lock:{$name}"; - while (true) - { - // 因为 setnx 没有 expire 设置,所以还是用set - //$result = self::$links[self::$link_name]->setnx($key, $value); - $result = self::$links[self::$link_name]->set($key, $value, array('nx', 'ex' => $expire)); - if ($result != false) - { - return true; - } - - usleep($interval); - } - return false; - } - } - catch (Exception $e) - { - $msg = "PHP Fatal error: Uncaught exception 'RedisException' with message '".$e->getMessage()."'\n"; - log::warn($msg); - if ($e->getCode() == 0) - { - self::$links[self::$link_name]->close(); - self::$links[self::$link_name] = null; - // 睡眠100毫秒 - usleep(100000); - return self::lock($name, $value, $expire, $interval); - } - } - return false; - } - - public static function unlock($name) - { - $key = "Lock:{$name}"; - return self::del($key); - } - - /** - * get - * - * @param mixed $key - * @return void - * @author seatle - * @created time :2015-12-13 01:05 - */ - public static function get($key) - { - self::init(); - try - { - if ( self::$links[self::$link_name] ) - { - return self::$links[self::$link_name]->get($key); - } - } - catch (Exception $e) - { - $msg = "PHP Fatal error: Uncaught exception 'RedisException' with message '".$e->getMessage()."'\n"; - log::warn($msg); - if ($e->getCode() == 0) - { - self::$links[self::$link_name]->close(); - self::$links[self::$link_name] = null; - usleep(100000); - return self::get($key); - } - } - return NULL; - } - - /** - * del 删除数据 - * - * @param mixed $key - * @return void - * @author seatle - * @created time :2015-12-13 01:05 - */ - public static function del($key) - { - self::init(); - try - { - if ( self::$links[self::$link_name] ) - { - return self::$links[self::$link_name]->del($key); - } - } - catch (Exception $e) - { - $msg = "PHP Fatal error: Uncaught exception 'RedisException' with message '".$e->getMessage()."'\n"; - log::warn($msg); - if ($e->getCode() == 0) - { - self::$links[self::$link_name]->close(); - self::$links[self::$link_name] = null; - usleep(100000); - return self::del($key); - } - } - return NULL; - } - - /** - * type 返回值的类型 - * - * @param mixed $key - * @return void - * @author seatle - * @created time :2015-12-13 01:05 - */ - public static function type($key) - { - self::init(); - - $types = array( - '0' => 'set', - '1' => 'string', - '3' => 'list', - ); - - try - { - if ( self::$links[self::$link_name] ) - { - $type = self::$links[self::$link_name]->type($key); - if (isset($types[$type])) - { - return $types[$type]; - } - } - } - catch (Exception $e) - { - $msg = "PHP Fatal error: Uncaught exception 'RedisException' with message '".$e->getMessage()."'\n"; - log::warn($msg); - if ($e->getCode() == 0) - { - self::$links[self::$link_name]->close(); - self::$links[self::$link_name] = null; - usleep(100000); - return self::type($key); - } - } - return NULL; - } - - /** - * incr 名称为key的string增加integer, integer为0则增1 - * - * @param mixed $key - * @param int $integer - * @return void - * @author seatle - * @created time :2015-12-18 11:28 - */ - public static function incr($key, $integer = 0) - { - self::init(); - try - { - if ( self::$links[self::$link_name] ) - { - if (empty($integer)) - { - return self::$links[self::$link_name]->incr($key); - } - else - { - return self::$links[self::$link_name]->incrby($key, $integer); - } - } - } - catch (Exception $e) - { - $msg = "PHP Fatal error: Uncaught exception 'RedisException' with message '".$e->getMessage()."'\n"; - log::warn($msg); - if ($e->getCode() == 0) - { - self::$links[self::$link_name]->close(); - self::$links[self::$link_name] = null; - usleep(100000); - return self::incr($key, $integer); - } - } - return NULL; - } - - /** - * decr 名称为key的string减少integer, integer为0则减1 - * - * @param mixed $key - * @param int $integer - * @return void - * @author seatle - * @created time :2015-12-18 11:28 - */ - public static function decr($key, $integer = 0) - { - self::init(); - try - { - if ( self::$links[self::$link_name] ) - { - if (empty($integer)) - { - return self::$links[self::$link_name]->decr($key); - } - else - { - return self::$links[self::$link_name]->decrby($key, $integer); - } - } - } - catch (Exception $e) - { - $msg = "PHP Fatal error: Uncaught exception 'RedisException' with message '".$e->getMessage()."'\n"; - log::warn($msg); - if ($e->getCode() == 0) - { - self::$links[self::$link_name]->close(); - self::$links[self::$link_name] = null; - usleep(100000); - return self::decr($key, $integer); - } - } - return NULL; - } - - /** - * append 名称为key的string的值附加value - * - * @param mixed $key - * @param mixed $value - * @return void - * @author seatle - * @created time :2015-12-18 11:28 - */ - public static function append($key, $value) - { - self::init(); - try - { - if ( self::$links[self::$link_name] ) - { - return self::$links[self::$link_name]->append($key, $value); - } - } - catch (Exception $e) - { - $msg = "PHP Fatal error: Uncaught exception 'RedisException' with message '".$e->getMessage()."'\n"; - log::warn($msg); - if ($e->getCode() == 0) - { - self::$links[self::$link_name]->close(); - self::$links[self::$link_name] = null; - usleep(100000); - return self::append($key, $value); - } - } - return NULL; - } - - /** - * substr 返回名称为key的string的value的子串 - * - * @param mixed $key - * @param mixed $start - * @param mixed $end - * @return void - * @author seatle - * @created time :2015-12-18 11:28 - */ - public static function substr($key, $start, $end) - { - self::init(); - try - { - if ( self::$links[self::$link_name] ) - { - return self::$links[self::$link_name]->substr($key, $start, $end); - } - } - catch (Exception $e) - { - $msg = "PHP Fatal error: Uncaught exception 'RedisException' with message '".$e->getMessage()."'\n"; - log::warn($msg); - if ($e->getCode() == 0) - { - self::$links[self::$link_name]->close(); - self::$links[self::$link_name] = null; - usleep(100000); - return self::substr($key, $start, $end); - } - } - return NULL; - } - - /** - * select 按索引查询 - * - * @param mixed $index - * @return void - * @author seatle - * @created time :2015-12-18 11:28 - */ - public static function select($index) - { - self::init(); - try - { - if ( self::$links[self::$link_name] ) - { - return self::$links[self::$link_name]->select($index); - } - } - catch (Exception $e) - { - $msg = "PHP Fatal error: Uncaught exception 'RedisException' with message '".$e->getMessage()."'\n"; - log::warn($msg); - if ($e->getCode() == 0) - { - self::$links[self::$link_name]->close(); - self::$links[self::$link_name] = null; - usleep(100000); - return self::select($index); - } - } - return NULL; - } - - /** - * dbsize 返回当前数据库中key的数目 - * - * @param mixed $key - * @return void - * @author seatle - * @created time :2015-12-18 11:28 - */ - public static function dbsize() - { - self::init(); - try - { - if ( self::$links[self::$link_name] ) - { - return self::$links[self::$link_name]->dbsize(); - } - } - catch (Exception $e) - { - $msg = "PHP Fatal error: Uncaught exception 'RedisException' with message '".$e->getMessage()."'\n"; - log::warn($msg); - if ($e->getCode() == 0) - { - self::$links[self::$link_name]->close(); - self::$links[self::$link_name] = null; - usleep(100000); - return self::dbsize(); - } - } - return 0; - } - - /** - * flushdb 删除当前选择数据库中的所有key - * - * @return void - * @author seatle - * @created time :2015-12-18 11:28 - */ - public static function flushdb() - { - self::init(); - try - { - if ( self::$links[self::$link_name] ) - { - return self::$links[self::$link_name]->flushdb(); - } - } - catch (Exception $e) - { - $msg = "PHP Fatal error: Uncaught exception 'RedisException' with message '".$e->getMessage()."'\n"; - log::warn($msg); - if ($e->getCode() == 0) - { - self::$links[self::$link_name]->close(); - self::$links[self::$link_name] = null; - usleep(100000); - return self::flushdb(); - } - } - return NULL; - } - - /** - * flushall 删除所有数据库中的所有key - * - * @return void - * @author seatle - * @created time :2015-12-18 11:28 - */ - public static function flushall() - { - self::init(); - try - { - if ( self::$links[self::$link_name] ) - { - return self::$links[self::$link_name]->flushall(); - } - } - catch (Exception $e) - { - $msg = "PHP Fatal error: Uncaught exception 'RedisException' with message '".$e->getMessage()."'\n"; - log::warn($msg); - if ($e->getCode() == 0) - { - self::$links[self::$link_name]->close(); - self::$links[self::$link_name] = null; - usleep(100000); - return self::flushall(); - } - } - return NULL; - } - - /** - * save 将数据保存到磁盘 - * - * @param mixed $is_bgsave 将数据异步保存到磁盘 - * @return void - * @author seatle - * @created time :2015-12-18 11:28 - */ - public static function save($is_bgsave = false) - { - self::init(); - try - { - if ( self::$links[self::$link_name] ) - { - if (!$is_bgsave) - { - return self::$links[self::$link_name]->save(); - } - else - { - return self::$links[self::$link_name]->bgsave(); - } - } - } - catch (Exception $e) - { - $msg = "PHP Fatal error: Uncaught exception 'RedisException' with message '".$e->getMessage()."'\n"; - log::warn($msg); - if ($e->getCode() == 0) - { - self::$links[self::$link_name]->close(); - self::$links[self::$link_name] = null; - usleep(100000); - return self::save($is_bgsave); - } - } - return NULL; - } - - /** - * info 提供服务器的信息和统计 - * - * @return void - * @author seatle - * @created time :2015-12-18 11:28 - */ - public static function info() - { - self::init(); - try - { - if ( self::$links[self::$link_name] ) - { - return self::$links[self::$link_name]->info(); - } - } - catch (Exception $e) - { - $msg = "PHP Fatal error: Uncaught exception 'RedisException' with message '".$e->getMessage()."'\n"; - log::warn($msg); - if ($e->getCode() == 0) - { - self::$links[self::$link_name]->close(); - self::$links[self::$link_name] = null; - usleep(100000); - return self::info(); - } - } - return NULL; - } - - /** - * slowlog 慢查询日志 - * - * @return void - * @author seatle - * @created time :2015-12-18 11:28 - */ - public static function slowlog($command = 'get', $len = 0) - { - self::init(); - try - { - if ( self::$links[self::$link_name] ) - { - if (!empty($len)) - { - return $redis->slowlog($command, $len); - } - else - { - return $redis->slowlog($command); - } - } - } - catch (Exception $e) - { - $msg = "PHP Fatal error: Uncaught exception 'RedisException' with message '".$e->getMessage()."'\n"; - log::warn($msg); - if ($e->getCode() == 0) - { - self::$links[self::$link_name]->close(); - self::$links[self::$link_name] = null; - usleep(100000); - return self::slowlog($command, $len); - } - } - return NULL; - } - - /** - * lastsave 返回上次成功将数据保存到磁盘的Unix时戳 - * - * @return void - * @author seatle - * @created time :2015-12-18 11:28 - */ - public static function lastsave() - { - self::init(); - try - { - if ( self::$links[self::$link_name] ) - { - return self::$links[self::$link_name]->lastsave(); - } - } - catch (Exception $e) - { - $msg = "PHP Fatal error: Uncaught exception 'RedisException' with message '".$e->getMessage()."'\n"; - log::warn($msg); - if ($e->getCode() == 0) - { - self::$links[self::$link_name]->close(); - self::$links[self::$link_name] = null; - usleep(100000); - return self::lastsave(); - } - } - return NULL; - } - - /** - * lpush 将数据从左边压入 - * - * @param mixed $key - * @param mixed $value - * @return void - * @author seatle - * @created time :2015-12-13 01:05 - */ - public static function lpush($key, $value) - { - self::init(); - try - { - if ( self::$links[self::$link_name] ) - { - return self::$links[self::$link_name]->lpush($key, $value); - } - } - catch (Exception $e) - { - $msg = "PHP Fatal error: Uncaught exception 'RedisException' with message '".$e->getMessage()."'\n"; - log::warn($msg); - if ($e->getCode() == 0) - { - self::$links[self::$link_name]->close(); - self::$links[self::$link_name] = null; - usleep(100000); - return self::lpush($key, $value); - } - } - return NULL; - } - - /** - * rpush 将数据从右边压入 - * - * @param mixed $key - * @param mixed $value - * @return void - * @author seatle - * @created time :2015-12-13 01:05 - */ - public static function rpush($key, $value) - { - self::init(); - try - { - if ( self::$links[self::$link_name] ) - { - return self::$links[self::$link_name]->rpush($key, $value); - } - } - catch (Exception $e) - { - $msg = "PHP Fatal error: Uncaught exception 'RedisException' with message '".$e->getMessage()."'\n"; - log::warn($msg); - if ($e->getCode() == 0) - { - self::$links[self::$link_name]->close(); - self::$links[self::$link_name] = null; - usleep(100000); - return self::rpush($key, $value); - } - } - return NULL; - } - - /** - * lpop 从左边弹出数据, 并删除数据 - * - * @param mixed $key - * @return void - * @author seatle - * @created time :2015-12-13 01:05 - */ - public static function lpop($key) - { - self::init(); - try - { - if ( self::$links[self::$link_name] ) - { - return self::$links[self::$link_name]->lpop($key); - } - } - catch (Exception $e) - { - $msg = "PHP Fatal error: Uncaught exception 'RedisException' with message '".$e->getMessage()."'\n"; - log::warn($msg); - if ($e->getCode() == 0) - { - self::$links[self::$link_name]->close(); - self::$links[self::$link_name] = null; - usleep(100000); - return self::lpop($key); - } - } - return NULL; - } - - /** - * rpop 从右边弹出数据, 并删除数据 - * - * @param mixed $key - * @return void - * @author seatle - * @created time :2015-12-13 01:05 - */ - public static function rpop($key) - { - self::init(); - try - { - if ( self::$links[self::$link_name] ) - { - return self::$links[self::$link_name]->rpop($key); - } - } - catch (Exception $e) - { - $msg = "PHP Fatal error: Uncaught exception 'RedisException' with message '".$e->getMessage()."'\n"; - log::warn($msg); - if ($e->getCode() == 0) - { - self::$links[self::$link_name]->close(); - self::$links[self::$link_name] = null; - usleep(100000); - return self::rpop($key); - } - } - return NULL; - } - - /** - * lsize 队列长度,同llen - * - * @param mixed $key - * @return void - * @author seatle - * @created time :2015-12-13 01:05 - */ - public static function lsize($key) - { - self::init(); - try - { - if ( self::$links[self::$link_name] ) - { - return self::$links[self::$link_name]->lSize($key); - } - } - catch (Exception $e) - { - $msg = "PHP Fatal error: Uncaught exception 'RedisException' with message '".$e->getMessage()."'\n"; - log::warn($msg); - if ($e->getCode() == 0) - { - self::$links[self::$link_name]->close(); - self::$links[self::$link_name] = null; - usleep(100000); - return self::lsize($key); - } - } - return NULL; - } - - /** - * lget 获取数据 - * - * @param mixed $key - * @param int $index - * @return void - * @author seatle - * @created time :2015-12-13 01:05 - */ - public static function lget($key, $index = 0) - { - self::init(); - try - { - if ( self::$links[self::$link_name] ) - { - return self::$links[self::$link_name]->lget($key, $index); - } - } - catch (Exception $e) - { - $msg = "PHP Fatal error: Uncaught exception 'RedisException' with message '".$e->getMessage()."'\n"; - log::warn($msg); - if ($e->getCode() == 0) - { - self::$links[self::$link_name]->close(); - self::$links[self::$link_name] = null; - usleep(100000); - return self::lget($key, $index); - } - } - return NULL; - } - - /** - * lRange 获取范围数据 - * - * @param mixed $key - * @param mixed $start - * @param mixed $end - * @return void - * @author seatle - * @created time :2015-12-13 01:05 - */ - public static function lrange($key, $start, $end) - { - self::init(); - try - { - if ( self::$links[self::$link_name] ) - { - return self::$links[self::$link_name]->lRange($key, $start, $end); - } - } - catch (Exception $e) - { - $msg = "PHP Fatal error: Uncaught exception 'RedisException' with message '".$e->getMessage()."'\n"; - log::warn($msg); - if ($e->getCode() == 0) - { - self::$links[self::$link_name]->close(); - self::$links[self::$link_name] = null; - usleep(100000); - return self::lrange($key, $start, $end); - } - } - return NULL; - } - - /** - * rlist 从右边弹出 $length 长度数据,并删除数据 - * - * @param mixed $key - * @param mixed $length - * @return void - * @author seatle - * @created time :2015-12-13 01:05 - */ - public static function rlist($key, $length) - { - $queue_length = self::lsize($key); - // 如果队列中有数据 - if ($queue_length > 0) - { - $list = array(); - $count = ($queue_length >= $length) ? $length : $queue_length; - for ($i = 0; $i < $count; $i++) - { - $data = self::rpop($key); - if ($data === false) - { - continue; - } - - $list[] = $data; - } - return $list; - } - else - { - // 没有数据返回NULL - return NULL; - } - } - - /** - * keys - * - * @param mixed $key - * @return void - * @author seatle - * @created time :2015-12-13 01:05 - * 查找符合给定模式的key。 - * KEYS *命中数据库中所有key。 - * KEYS h?llo命中hello, hallo and hxllo等。 - * KEYS h*llo命中hllo和heeeeello等。 - * KEYS h[ae]llo命中hello和hallo,但不命中hillo。 - * 特殊符号用"\"隔开 - * 因为这个类加了OPT_PREFIX前缀,所以并不能真的列出redis所有的key,需要的话,要把前缀去掉 - */ - public static function keys($key) - { - self::init(); - try - { - if ( self::$links[self::$link_name] ) - { - return self::$links[self::$link_name]->keys($key); - } - } - catch (Exception $e) - { - $msg = "PHP Fatal error: Uncaught exception 'RedisException' with message '".$e->getMessage()."'\n"; - log::warn($msg); - if ($e->getCode() == 0) - { - self::$links[self::$link_name]->close(); - self::$links[self::$link_name] = null; - usleep(100000); - return self::keys($key); - } - } - return NULL; - } - - /** - * ttl 返回某个KEY的过期时间 - * 正数:剩余多少秒 - * -1:永不超时 - * -2:key不存在 - * @param mixed $key - * @return void - * @author seatle - * @created time :2015-12-13 01:05 - */ - public static function ttl($key) - { - self::init(); - try - { - if ( self::$links[self::$link_name] ) - { - return self::$links[self::$link_name]->ttl($key); - } - } - catch (Exception $e) - { - $msg = "PHP Fatal error: Uncaught exception 'RedisException' with message '".$e->getMessage()."'\n"; - log::warn($msg); - if ($e->getCode() == 0) - { - self::$links[self::$link_name]->close(); - self::$links[self::$link_name] = null; - usleep(100000); - return self::ttl($key); - } - } - return NULL; - } - - /** - * expire 为某个key设置过期时间,同setTimeout - * - * @param mixed $key - * @param mixed $expire - * @return void - * @author seatle - * @created time :2015-12-13 01:05 - */ - public static function expire($key, $expire) - { - self::init(); - try - { - if ( self::$links[self::$link_name] ) - { - return self::$links[self::$link_name]->expire($key, $expire); - } - } - catch (Exception $e) - { - $msg = "PHP Fatal error: Uncaught exception 'RedisException' with message '".$e->getMessage()."'\n"; - log::warn($msg); - if ($e->getCode() == 0) - { - self::$links[self::$link_name]->close(); - self::$links[self::$link_name] = null; - usleep(100000); - return self::expire($key, $expire); - } - } - return NULL; - } - - /** - * exists key值是否存在 - * - * @param mixed $key - * @return void - * @author seatle - * @created time :2015-12-13 01:05 - */ - public static function exists($key) - { - self::init(); - try - { - if ( self::$links[self::$link_name] ) - { - return self::$links[self::$link_name]->exists($key); - } - } - catch (Exception $e) - { - $msg = "PHP Fatal error: Uncaught exception 'RedisException' with message '".$e->getMessage()."'\n"; - log::warn($msg); - if ($e->getCode() == 0) - { - self::$links[self::$link_name]->close(); - self::$links[self::$link_name] = null; - usleep(100000); - return self::exists($key); - } - } - return false; - } - - /** - * ping 检查当前redis是否存在且是否可以连接上 - * - * @return void - * @author seatle - * @created time :2015-12-13 01:05 - */ - //protected static function ping() - //{ - //if ( empty (self::$links[self::$link_name]) ) - //{ - //return false; - //} - //return self::$links[self::$link_name]->ping() == '+PONG'; - //} - - public static function encode($value) - { - return json_encode($value, JSON_UNESCAPED_UNICODE); - } - - public static function decode($value) - { - return json_decode($value, true); - } - - /** - * 集合操作 - */ - - /** - * sadd 将数据压入集合 - * - * @param mixed $key - * @param mixed $value - * @return void - * @author seatle - * @created time :2015-12-13 01:05 - */ - public static function sadd($key, $value) - { - self::init(); - try - { - if (self::$links[self::$link_name]) - { - return self::$links[self::$link_name]->sadd($key, $value); - } - } - catch (Exception $e) - { - $msg = "PHP Fatal error: Uncaught exception 'RedisException' with message '".$e->getMessage()."'\n"; - log::warn($msg); - if ($e->getCode() == 0) - { - self::$links[self::$link_name]->close(); - self::$links[self::$link_name] = null; - usleep(100000); - return self::sadd($key, $value); - } - } - return null; - } - - /** - * spop 从集合中随机取出数据并移除 - * - * @param mixed $key - * @return void - * @author seatle - * @created time :2015-12-13 01:05 - */ - public static function spop($key) - { - self::init(); - try - { - if (self::$links[self::$link_name]) - { - return self::$links[self::$link_name]->spop($key); - } - } - catch (Exception $e) - { - $msg = "PHP Fatal error: Uncaught exception 'RedisException' with message '".$e->getMessage()."'\n"; - log::warn($msg); - if ($e->getCode() == 0) - { - self::$links[self::$link_name]->close(); - self::$links[self::$link_name] = null; - usleep(100000); - return self::spop($key); - } - } - return null; - } - - /** - * Redis Scard 命令返回集合中元素的数量。 - * - * @param mixed $key - * @return void - * @author seatle - * @created time :2015-12-13 01:05 - */ - public static function scard($key) - { - self::init(); - try - { - if (self::$links[self::$link_name]) - { - return self::$links[self::$link_name]->scard($key); - } - } - catch (Exception $e) - { - $msg = "PHP Fatal error: Uncaught exception 'RedisException' with message '".$e->getMessage()."'\n"; - log::warn($msg); - if ($e->getCode() == 0) - { - self::$links[self::$link_name]->close(); - self::$links[self::$link_name] = null; - usleep(100000); - return self::scard($key); - } - } - return null; - } - -} diff --git a/vendor/owner888/phpspider/core/requests.php b/vendor/owner888/phpspider/core/requests.php deleted file mode 100644 index 5d15787..0000000 --- a/vendor/owner888/phpspider/core/requests.php +++ /dev/null @@ -1,998 +0,0 @@ - -// +---------------------------------------------------------------------- - -// +---------------------------------------------------------------------- -// | GET请求 -// | requests::get('http://www.test.com'); -// | SERVER -// | $_GET -// +---------------------------------------------------------------------- -// | POST请求 -// | $data = array('name'=>'request'); -// | requests::post('http://www.test.com', $data); -// | SERVER -// | $_POST -// +---------------------------------------------------------------------- -// | POST RESTful请求 -// | $data = array('name'=>'request'); -// | $data_string = json_encode($data); -// | requests::set_header("Content-Type", "application/json"); -// | requests::post('http://www.test.com', $data_string); -// | SERVER -// | file_get_contents('php://input') -// +---------------------------------------------------------------------- -// | POST 文件上传 -// | $data = array('file1'=>''./data/phpspider.log''); -// | requests::post('http://www.test.com', null, $data); -// | SERVER -// | $_FILES -// +---------------------------------------------------------------------- -// | 代理 -// | requests::set_proxy(array('223.153.69.150:42354')); -// | $html = requests::get('https://www.test.com'); -// +---------------------------------------------------------------------- - -//---------------------------------- -// PHPSpider请求类文件 -//---------------------------------- - -namespace phpspider\core; - -if (!function_exists('curl_file_create')) -{ - function curl_file_create($filename, $mimetype = '', $postname = '') - { - return "@$filename;filename=" - . ($postname ?: basename($filename)) - . ($mimetype ? ";type=$mimetype" : ''); - } -} - -class requests -{ - const VERSION = '2.0.1'; - - protected static $ch = null; - - /**** Public variables ****/ - - /* user definable vars */ - - public static $timeout = 15; - public static $encoding = null; - public static $input_encoding = null; - public static $output_encoding = null; - public static $cookies = array(); // array of cookies to pass - // $cookies['username'] = "seatle"; - public static $rawheaders = array(); // array of raw headers to send - public static $domain_cookies = array(); // array of cookies for domain to pass - public static $hosts = array(); // random host binding for make request faster - public static $headers = array(); // headers returned from server sent here - public static $useragents = array("requests/2.0.0"); // random agent we masquerade as - public static $client_ips = array(); // random ip we masquerade as - public static $proxies = array(); // random proxy ip - public static $raw = ""; // head + body content returned from server sent here - public static $head = ""; // head content - public static $content = ""; // The body before encoding - public static $text = ""; // The body after encoding - public static $info = array(); // curl info - public static $history = 302; // http request status before redirect. ex:30x - public static $status_code = 0; // http request status - public static $error = ""; // error messages sent here - - /** - * set timeout - * $timeout 为数组时会分别设置connect和read - * - * @param init or array $timeout - * @return - */ - public static function set_timeout($timeout) - { - self::$timeout = $timeout; - } - - /** - * 设置代理 - * 如果代理有多个,请求时会随机使用 - * - * @param mixed $proxies - * array ( - * 'socks5://user1:pass2@host:port', - * 'socks5://user2:pass2@host:port' - *) - * @return void - * @author seatle - * @created time :2016-09-18 10:17 - */ - public static function set_proxy($proxy) - { - self::$proxies = is_array($proxy) ? $proxy : array($proxy); - } - - /** - * 删除代理 - * 因为每个链接信息里面都有代理信息,有的链接需要,有的不需要,所以必须提供一个删除功能 - * - * @return void - * @author seatle - * @created time :2018-07-16 17:59 - */ - public static function del_proxy() - { - self::$proxies = array(); - } - - /** - * 自定义请求头部 - * 请求头内容可以用 requests::$rawheaders 来获取 - * 比如获取Content-Type:requests::$rawheaders['Content-Type'] - * - * @param string $headers - * @return void - */ - public static function set_header($key, $value) - { - self::$rawheaders[$key] = $value; - } - - /** - * 设置全局COOKIE - * - * @param string $cookie - * @return void - */ - public static function set_cookie($key, $value, $domain = '') - { - if (empty($key)) - { - return false; - } - if (!empty($domain)) - { - self::$domain_cookies[$domain][$key] = $value; - } - else - { - self::$cookies[$key] = $value; - } - return true; - } - - /** - * 批量设置全局cookie - * - * @param mixed $cookies - * @param string $domain - * @return void - * @author seatle - * @created time :2017-08-03 18:06 - */ - public static function set_cookies($cookies, $domain = '') - { - $cookies_arr = explode(';', $cookies); - if (empty($cookies_arr)) - { - return false; - } - - foreach ($cookies_arr as $cookie) - { - $cookie_arr = explode('=', $cookie, 2); - $key = $cookie_arr[0]; - $value = empty($cookie_arr[1]) ? '' : $cookie_arr[1]; - - if (!empty($domain)) - { - self::$domain_cookies[$domain][$key] = $value; - } - else - { - self::$cookies[$key] = $value; - } - } - return true; - } - - /** - * 获取单一Cookie - * - * @param mixed $name cookie名称 - * @param string $domain 不传则取全局cookie,就是手动set_cookie的cookie - * @return void - * @author seatle - * @created time :2017-08-03 18:06 - */ - public static function get_cookie($name, $domain = '') - { - if (!empty($domain) && !isset(self::$domain_cookies[$domain])) - { - return ''; - } - $cookies = empty($domain) ? self::$cookies : self::$domain_cookies[$domain]; - return isset($cookies[$name]) ? $cookies[$name] : ''; - } - - /** - * 获取Cookie数组 - * - * @param string $domain 不传则取全局cookie,就是手动set_cookie的cookie - * @return void - * @author seatle - * @created time :2017-08-03 18:06 - */ - public static function get_cookies($domain = '') - { - if (!empty($domain) && !isset(self::$domain_cookies[$domain])) - { - return array(); - } - return empty($domain) ? self::$cookies : self::$domain_cookies[$domain]; - } - - /** - * 删除Cookie - * - * @param string $domain 不传则删除全局Cookie - * @return void - * @author seatle - * @created time :2017-08-03 18:06 - */ - public static function del_cookie($key, $domain = '') - { - if (empty($key)) - { - return false; - } - - if (!empty($domain) && !isset(self::$domain_cookies[$domain])) - { - return false; - } - - if (!empty($domain)) - { - if (isset(self::$domain_cookies[$domain][$key])) - { - unset(self::$domain_cookies[$domain][$key]); - } - } - else - { - if (isset(self::$cookies[$key])) - { - unset(self::$cookies[$key]); - } - } - return true; - } - - /** - * 删除Cookie - * - * @param string $domain 不传则删除全局Cookie - * @return void - * @author seatle - * @created time :2017-08-03 18:06 - */ - public static function del_cookies($domain = '') - { - if (!empty($domain) && !isset(self::$domain_cookies[$domain])) - { - return false; - } - if ( empty($domain) ) - { - self::$cookies = array(); - } - else - { - if (isset(self::$domain_cookies[$domain])) - { - unset(self::$domain_cookies[$domain]); - } - } - return true; - } - - /** - * 设置随机的user_agent - * - * @param string $useragent - * @return void - */ - public static function set_useragent($useragent) - { - self::$useragents = is_array($useragent) ? $useragent : array($useragent); - } - - /** - * set referer - * - */ - public static function set_referer($referer) - { - self::$rawheaders['Referer'] = $referer; - } - - /** - * 设置伪造IP - * 传入数组则为随机IP - * @param string $ip - * @return void - */ - public static function set_client_ip($ip) - { - self::$client_ips = is_array($ip) ? $ip : array($ip); - } - - /** - * 删除伪造IP - * - * @return void - * @author seatle - * @created time :2018-07-16 17:59 - */ - public static function del_client_ip() - { - self::$client_ips = array(); - } - - /** - * 设置中文请求 - * - * @param string $lang - * @return void - */ - public static function set_accept_language($lang = 'zh-CN') - { - self::$rawheaders['Accept-Language'] = $lang; - } - - /** - * 设置Hosts - * 负载均衡到不同的服务器,如果对方使用CDN,采用这个是最好的了 - * - * @param string $hosts - * @return void - */ - public static function set_hosts($host, $ips = array()) - { - $ips = is_array($ips) ? $ips : array($ips); - self::$hosts[$host] = $ips; - } - - /** - * 分割返回的header和body - * header用来判断编码和获取Cookie - * body用来判断编码,得到编码前和编码后的内容 - * - * @return void - * @author seatle - * @created time :2017-08-03 18:06 - */ - public static function split_header_body() - { - $head = $body = ''; - $head = substr(self::$raw, 0, self::$info['header_size']); - $body = substr(self::$raw, self::$info['header_size']); - // http header - self::$head = $head; - // The body before encoding - self::$content = $body; - - //$http_headers = array(); - //// 解析HTTP数据流 - //if (!empty(self::$raw)) - //{ - //self::get_response_cookies($domain); - //// body里面可能有 \r\n\r\n,但是第一个一定是HTTP Header,去掉后剩下的就是body - //$array = explode("\r\n\r\n", self::$raw); - //foreach ($array as $k=>$v) - //{ - //// post 方法会有两个http header:HTTP/1.1 100 Continue、HTTP/1.1 200 OK - //if (preg_match("#^HTTP/.*? 100 Continue#", $v)) - //{ - //unset($array[$k]); - //continue; - //} - //if (preg_match("#^HTTP/.*? \d+ #", $v)) - //{ - //$header = $v; - //unset($array[$k]); - //$http_headers = self::get_response_headers($v); - //} - //} - //$body = implode("\r\n\r\n", $array); - //} - - // 设置了输出编码的转码,注意: xpath只支持utf-8,iso-8859-1 不要转,他本身就是utf-8 - $body = self::encoding($body); //自动转码 - // 转码后 - self::$encoding = self::$output_encoding; - - // The body after encoding - self::$text = $body; - return array($head, $body); - } - - /** - * 获得域名相对应的Cookie - * - * @param mixed $header - * @param mixed $domain - * @return void - * @author seatle - * @created time :2017-08-03 18:06 - */ - public static function get_response_cookies($header, $domain) - { - // 解析Cookie并存入 self::$cookies 方便调用 - preg_match_all("/.*?Set\-Cookie: ([^\r\n]*)/i", $header, $matches); - $cookies = empty($matches[1]) ? array() : $matches[1]; - - // 解析到Cookie - if (!empty($cookies)) - { - $cookies = implode(';', $cookies); - $cookies = explode(';', $cookies); - foreach ($cookies as $cookie) - { - $cookie_arr = explode('=', $cookie, 2); - // 过滤 httponly、secure - if (count($cookie_arr) < 2) - { - continue; - } - $cookie_name = !empty($cookie_arr[0]) ? trim($cookie_arr[0]) : ''; - if (empty($cookie_name)) - { - continue; - } - // 过滤掉domain路径 - if (in_array(strtolower($cookie_name), array('path', 'domain', 'expires', 'max-age'))) - { - continue; - } - self::$domain_cookies[$domain][trim($cookie_arr[0])] = trim($cookie_arr[1]); - } - } - } - - /** - * 获得response header - * 此方法占时没有用到 - * - * @param mixed $header - * @return void - * @author seatle - * @created time :2017-08-03 18:06 - */ - public static function get_response_headers($header) - { - $headers = array(); - $header_lines = explode("\n", $header); - if (!empty($header_lines)) - { - foreach ($header_lines as $line) - { - $header_arr = explode(':', $line, 2); - $key = empty($header_arr[0]) ? '' : trim($header_arr[0]); - $val = empty($header_arr[1]) ? '' : trim($header_arr[1]); - if (empty($key) || empty($val)) - { - continue; - } - $headers[$key] = $val; - } - } - self::$headers = $headers; - return self::$headers; - } - - /** - * 获取编码 - * @param $string - * @return string - */ - public static function get_encoding($string) - { - $encoding = mb_detect_encoding($string, array('UTF-8', 'GBK', 'GB2312', 'LATIN1', 'ASCII', 'BIG5', 'ISO-8859-1')); - return strtolower($encoding); - } - - /** - * 移除页面head区域代码 - * @param $html - * @return mixed - */ - private static function _remove_head($html) - { - return preg_replace('/.+<\/head>/is', '', $html); - } - - /** - * 简单的判断一下参数是否为一个URL链接 - * @param string $str - * @return boolean - */ - private static function _is_url($url) - { - //$pattern = '/^http(s)?:\\/\\/.+/'; - $pattern = "/\b(([\w-]+:\/\/?|www[.])[^\s()<>]+(?:\([\w\d]+\)|([^[:punct:]\s]|\/)))/"; - if (preg_match($pattern, $url)) - { - return true; - } - return false; - } - - /** - * 初始化 CURL - * - */ - public static function init() - { - if (!is_resource ( self::$ch )) - { - self::$ch = curl_init (); - curl_setopt( self::$ch, CURLOPT_RETURNTRANSFER, true ); - curl_setopt( self::$ch, CURLOPT_HEADER, false ); - curl_setopt( self::$ch, CURLOPT_USERAGENT, "phpspider-requests/".self::VERSION ); - // 如果设置了两个时间,就分开设置 - if (is_array(self::$timeout)) - { - curl_setopt( self::$ch, CURLOPT_CONNECTTIMEOUT, self::$timeout[0] ); - curl_setopt( self::$ch, CURLOPT_TIMEOUT, self::$timeout[1]); - } - else - { - curl_setopt(self::$ch, CURLOPT_CONNECTTIMEOUT, ceil(self::$timeout / 2)); - curl_setopt(self::$ch, CURLOPT_TIMEOUT, self::$timeout); - } - curl_setopt(self::$ch, CURLOPT_MAXREDIRS, 5); //maximum number of redirects allowed - // 在多线程处理场景下使用超时选项时,会忽略signals对应的处理函数,但是无耐的是还有小概率的crash情况发生 - curl_setopt( self::$ch, CURLOPT_NOSIGNAL, true); - } - return self::$ch; - } - - /** - * get 请求 - */ - public static function get($url, $fields = array(), $allow_redirects = true, $cert = NULL) - { - self::init (); - return self::request($url, 'get', $fields, NULL, $allow_redirects, $cert); - } - - /** - * post 请求 - * $fields 有三种类型:1、数组;2、http query;3、json - * 1、array('name'=>'yangzetao') - * 2、http_build_query(array('name'=>'yangzetao')) - * 3、json_encode(array('name'=>'yangzetao')) - * 前两种是普通的post,可以用$_POST方式获取 - * 第三种是post stream( json rpc,其实就是webservice ) - * 虽然是post方式,但是只能用流方式 http://input 后者 $HTTP_RAW_POST_DATA 获取 - * - * @param mixed $url - * @param array $fields - * @param mixed $proxies - * @static - * @access public - * @return void - */ - public static function post($url, $fields = array(), $files = array(), $allow_redirects = true, $cert = NULL) - { - self::init (); - return self::request($url, 'POST', $fields, $files, $allow_redirects, $cert); - } - - public static function put($url, $fields = array(), $allow_redirects = true, $cert = NULL) - { - self::init (); - return self::request($url, 'PUT', $fields, $allow_redirects, $cert); - } - - public static function delete($url, $fields = array(), $allow_redirects = true, $cert = NULL) - { - self::init (); - return self::request($url, 'DELETE', $fields, $allow_redirects, $cert); - } - - // 响应HTTP头域里的元信息 - // 此方法被用来获取请求实体的元信息而不需要传输实体主体(entity-body) - // 此方法经常被用来测试超文本链接的有效性,可访问性,和最近的改变。. - public static function head($url, $fields = array(), $allow_redirects = true, $cert = NULL) - { - self::init (); - self::request($url, 'HEAD', $fields, $allow_redirects, $cert); - } - - public static function options($url, $fields = array(), $allow_redirects = true, $cert = NULL) - { - self::init (); - return self::request($url, 'OPTIONS', $fields, $allow_redirects, $cert); - } - - public static function patch($url, $fields = array(), $allow_redirects = true, $cert = NULL) - { - self::init (); - return self::request($url, 'PATCH', $fields, $allow_redirects, $cert); - } - - /** - * request - * - * @param mixed $url 请求URL - * @param string $method 请求方法 - * @param array $fields 表单字段 - * @param array $files 上传文件 - * @param mixed $cert CA证书 - * @return void - * @author seatle - * @created time :2017-08-03 18:06 - */ - public static function request($url, $method = 'GET', $fields = array(), $files = array(), $allow_redirects = true, $cert = NULL) - { - $method = strtoupper($method); - if(!self::_is_url($url)) - { - self::$error = "You have requested URL ({$url}) is not a valid HTTP address"; - return false; - } - - // 如果是 get 方式,直接拼凑一个 url 出来 - if ($method == 'GET' && !empty($fields)) - { - $url = $url.(strpos($url, '?') === false ? '?' : '&').http_build_query($fields); - } - - $parse_url = parse_url($url); - if (empty($parse_url) || empty($parse_url['host']) || !in_array($parse_url['scheme'], array('http', 'https'))) - { - self::$error = "No connection adapters were found for '{$url}'"; - return false; - } - $scheme = $parse_url['scheme']; - $domain = $parse_url['host']; - - // 随机绑定 hosts,做负载均衡 - if (self::$hosts) - { - if (isset(self::$hosts[$domain])) - { - $hosts = self::$hosts[$domain]; - $key = rand(0, count($hosts)-1); - $ip = $hosts[$key]; - $url = str_replace($domain, $ip, $url); - self::$rawheaders['Host'] = $domain; - } - } - - curl_setopt( self::$ch, CURLOPT_URL, $url ); - - if ($method != 'GET') - { - // 如果是 post 方式 - if ($method == 'POST') - { - //curl_setopt( self::$ch, CURLOPT_POST, true ); - $tmpheaders = array_change_key_case(self::$rawheaders, CASE_LOWER); - // 有些RESTful服务只接受JSON形态的数据 - // CURLOPT_POST会把上傳的文件类型设为 multipart/form-data - // 把CURLOPT_POSTFIELDS的内容按multipart/form-data 的形式编码 - // CURLOPT_CUSTOMREQUEST可以按指定内容上传 - if ( isset($tmpheaders['content-type']) && $tmpheaders['content-type'] == 'application/json' ) - { - curl_setopt( self::$ch, CURLOPT_CUSTOMREQUEST, $method ); - } - else - { - curl_setopt( self::$ch, CURLOPT_POST, true ); - } - - $file_fields = array(); - if (!empty($files)) - { - foreach ($files as $postname => $file) - { - $filepath = realpath($file); - // 如果文件不存在 - if (!file_exists($filepath)) - { - continue; - } - - $filename = basename($filepath); - $type = self::get_mimetype($filepath); - $file_fields[$postname] = curl_file_create($filepath, $type, $filename); - // curl -F "name=seatle&file=@/absolute/path/to/image.png" htt://localhost/uploadfile.php - //$cfile = '@'.realpath($filename).";type=".$type.";filename=".$filename; - } - } - } - else - { - self::$rawheaders['X-HTTP-Method-Override'] = $method; - curl_setopt( self::$ch, CURLOPT_CUSTOMREQUEST, $method ); - } - - if ( $method == 'POST' ) - { - // 不是上传文件的,用http_build_query, 能实现更好的兼容性,更小的请求数据包 - if ( empty($file_fields) ) - { - // post方式 - if ( is_array($fields) ) - { - $fields = http_build_query($fields); - } - } - else - { - // 有post数据 - if ( is_array($fields) && !empty($fields) ) - { - // 某些server可能会有问题 - $fields = array_merge($fields, $file_fields); - } - else - { - $fields = $file_fields; - } - } - - // 不能直接传数组,不知道是什么Bug,会非常慢 - curl_setopt( self::$ch, CURLOPT_POSTFIELDS, $fields ); - } - } - - $cookies = self::get_cookies(); - $domain_cookies = self::get_cookies($domain); - $cookies = array_merge($cookies, $domain_cookies); - // 是否设置了cookie - if (!empty($cookies)) - { - foreach ($cookies as $key=>$value) - { - $cookie_arr[] = $key.'='.$value; - } - $cookies = implode('; ', $cookie_arr); - curl_setopt(self::$ch, CURLOPT_COOKIE, $cookies); - } - - if (!empty(self::$useragents)) - { - $key = rand(0, count(self::$useragents) - 1); - self::$rawheaders['User-Agent'] = self::$useragents[$key]; - } - - if (!empty(self::$client_ips)) - { - $key = rand(0, count(self::$client_ips) - 1); - self::$rawheaders['CLIENT-IP'] = self::$client_ips[$key]; - self::$rawheaders['X-FORWARDED-FOR'] = self::$client_ips[$key]; - } - - if (self::$rawheaders) - { - $http_headers = array(); - foreach (self::$rawheaders as $k=>$v) - { - $http_headers[] = $k.': '.$v; - } - curl_setopt( self::$ch, CURLOPT_HTTPHEADER, $http_headers ); - } - - curl_setopt( self::$ch, CURLOPT_ENCODING, 'gzip' ); - - // 关闭验证 - if ($scheme == 'https') - { - curl_setopt(self::$ch, CURLOPT_SSL_VERIFYPEER, false); - curl_setopt(self::$ch, CURLOPT_SSL_VERIFYHOST, false); - } - - if (self::$proxies) - { - $key = rand(0, count(self::$proxies) - 1); - $proxy = self::$proxies[$key]; - curl_setopt( self::$ch, CURLOPT_PROXY, $proxy ); - } - - // header + body,header 里面有 cookie - curl_setopt( self::$ch, CURLOPT_HEADER, true ); - // 请求跳转后的内容 - if ($allow_redirects) - { - curl_setopt( self::$ch, CURLOPT_FOLLOWLOCATION, true); - } - - self::$raw = curl_exec ( self::$ch ); - // 真实url - //$location = curl_getinfo( self::$ch, CURLINFO_EFFECTIVE_URL); - self::$info = curl_getinfo( self::$ch ); - //print_r(self::$info); - self::$status_code = self::$info['http_code']; - if (self::$raw === false) - { - self::$error = 'Curl error: ' . curl_error( self::$ch ); - //trigger_error(self::$error, E_USER_WARNING); - } - - // 关闭句柄 - curl_close( self::$ch ); - - // 请求成功之后才把URL存起来 - list($header, $text) = self::split_header_body(); - self::$history = self::get_history($header); - self::$headers = self::get_response_headers($header); - self::get_response_cookies($header, $domain); - //$data = substr($data, 10); - //$data = gzinflate($data); - return $text; - } - - public static function get_history($header) - { - $status_code = 0; - $lines = explode("\n", $header); - foreach ($lines as $line) - { - $line = trim($line); - if (preg_match("#^HTTP/.*? (\d+) Found#", $line, $out)) - { - $status_code = empty($out[1]) ? 0 : intval($out[1]); - } - } - return $status_code; - } - - // 获取 mimetype - public static function get_mimetype($filepath) - { - $fp = finfo_open(FILEINFO_MIME); - $mime = finfo_file($fp, $filepath); - finfo_close($fp); - $arr = explode(';', $mime); - $type = empty($arr[0]) ? '' : $arr[0]; - return $type; - } - - /** - * 拼凑文件和表单 - * 占时没有用到 - * - * @param mixed $post_fields - * @param mixed $file_fields - * @return void - * @author seatle - * @created time :2017-08-03 18:06 - */ - public static function get_postfile_form($post_fields, $file_fields) - { - // 构造post数据 - $data = ''; - $delimiter = '-------------' . uniqid(); - // 表单数据 - foreach ($post_fields as $name => $content) - { - $data .= '--'.$delimiter."\r\n"; - $data .= 'Content-Disposition: form-data; name = "'.$name.'"'; - $data .= "\r\n\r\n"; - $data .= $content; - $data .= "\r\n"; - } - - foreach ($file_fields as $input_name => $file) - { - $data .= '--'.$delimiter."\r\n"; - $data .= 'Content-Disposition: form-data; name = "'.$input_name.'";'. - ' filename="'.$file['filename'].'"'."\r\n"; - $data .= "Content-Type: {$file['type']}\r\n"; - $data .= "\r\n"; - $data .= $file['content']; - $data .= "\r\n"; - } - - // 结束符 - $data .= '--'.$delimiter."--\r\n"; - - //return array( - //CURLOPT_HTTPHEADER => array( - //'Content-Type:multipart/form-data;boundary=' . $delimiter, - //'Content-Length:' . strlen($data) - //), - //CURLOPT_POST => true, - //CURLOPT_POSTFIELDS => $data, - //); - return array($delimiter, $data); - } - - /** - * html encoding transform - * - * @param string $html - * @param string $in - * @param string $out - * @param string $content - * @param string $mode - * auto|iconv|mb_convert_encoding - * @return string - */ - public static function encoding($html, $in = null, $out = null, $mode = 'auto') - { - $valid = array( - 'auto', - 'iconv', - 'mb_convert_encoding', - ); - if (isset(self::$output_encoding)) - { - $out = self::$output_encoding; - } - if ( ! isset($out)) - { - $out = 'UTF-8'; - } - if ( ! in_array($mode, $valid)) - { - throw new Exception('invalid mode, mode='.$mode); - } - $if = function_exists('mb_convert_encoding'); - $if = $if && ($mode == 'auto' || $mode == 'mb_convert_encoding'); - if (function_exists('iconv') && ($mode == 'auto' || $mode == 'iconv')) - { - $func = 'iconv'; - } - elseif ($if) - { - $func = 'mb_convert_encoding'; - } - else - { - throw new Exception('charsetTrans failed, no function'); - } - - $pattern = '/(]*?charset=([\"\']?))([a-z\d_\-]*)(\2[^>]*?>)/is'; - if ( ! isset($in)) - { - $n = preg_match($pattern, $html, $in); - if ($n > 0) - { - $in = $in[3]; - } - else - { - $in = null; - } - if (empty($in) and function_exists('mb_detect_encoding')) - { - $in = mb_detect_encoding($html, array('UTF-8', 'GBK', 'GB2312', 'LATIN1', 'ASCII', 'BIG5', 'ISO-8859-1')); - } - } - - if (isset($in)) - { - if ($in == 'ISO-8859-1') - { - $in = 'UTF-8'; - } - $old = error_reporting(error_reporting() & ~E_NOTICE); - $html = call_user_func($func, $in, $out.'//IGNORE', $html); - error_reporting($old); - $html = preg_replace($pattern, "\\1$out\\4", $html, 1); - } - return $html; - } -} diff --git a/vendor/owner888/phpspider/core/selector.php b/vendor/owner888/phpspider/core/selector.php deleted file mode 100644 index f17cff4..0000000 --- a/vendor/owner888/phpspider/core/selector.php +++ /dev/null @@ -1,588 +0,0 @@ - -// +---------------------------------------------------------------------- - -//---------------------------------- -// PHPSpider选择器类文件 -//---------------------------------- - -namespace phpspider\core; - -use phpspider\library\phpquery; -use DOMDocument; -use DOMXpath; -use Exception; - -class selector -{ - /** - * 版本号 - * @var string - */ - const VERSION = '1.0.2'; - public static $dom = null; - public static $dom_auth = ''; - public static $xpath = null; - public static $error = null; - - public static function select($html, $selector, $selector_type = 'xpath') - { - if (empty($html) || empty($selector)) - { - return false; - } - - $selector_type = strtolower($selector_type); - if ($selector_type == 'xpath') - { - return self::_xpath_select($html, $selector); - } - elseif ($selector_type == 'regex') - { - return self::_regex_select($html, $selector); - } - elseif ($selector_type == 'css') - { - return self::_css_select($html, $selector); - } - } - - public static function remove($html, $selector, $selector_type = 'xpath') - { - if (empty($html) || empty($selector)) - { - return false; - } - - $remove_html = ""; - $selector_type = strtolower($selector_type); - if ($selector_type == 'xpath') - { - $remove_html = self::_xpath_select($html, $selector, true); - } - elseif ($selector_type == 'regex') - { - $remove_html = self::_regex_select($html, $selector, true); - } - elseif ($selector_type == 'css') - { - $remove_html = self::_css_select($html, $selector, true); - } - $html = str_replace($remove_html, "", $html); - return $html; - } - - /** - * xpath选择器 - * - * @param mixed $html - * @param mixed $selector - * @return void - * @author seatle - * @created time :2016-10-26 12:53 - */ - private static function _xpath_select($html, $selector, $remove = false) - { - if (!is_object(self::$dom)) - { - self::$dom = new DOMDocument(); - } - - // 如果加载的不是之前的HTML内容,替换一下验证标识 - if (self::$dom_auth != md5($html)) - { - self::$dom_auth = md5($html); - @self::$dom->loadHTML(''.$html); - self::$xpath = new DOMXpath(self::$dom); - } - - //libxml_use_internal_errors(true); - //self::$dom->loadHTML(''.$html); - //$errors = libxml_get_errors(); - //if (!empty($errors)) - //{ - //print_r($errors); - //exit; - //} - - $elements = @self::$xpath->query($selector); - if ($elements === false) - { - self::$error = "the selector in the xpath(\"{$selector}\") syntax errors"; - // 不应该返回false,因为isset(false)为true,更不能通过 !$values 去判断,因为!0为true,所以这里只能返回null - //return false; - return null; - } - - $result = array(); - if (!is_null($elements)) - { - foreach ($elements as $element) - { - // 如果是删除操作,取一整块代码 - if ($remove) - { - $content = self::$dom->saveXml($element); - } - else - { - $nodeName = $element->nodeName; - $nodeType = $element->nodeType; // 1.Element 2.Attribute 3.Text - //$nodeAttr = $element->getAttribute('src'); - //$nodes = util::node_to_array(self::$dom, $element); - //echo $nodes['@src']."\n"; - // 如果是img标签,直接取src值 - if ($nodeType == 1 && in_array($nodeName, array('img'))) - { - $content = $element->getAttribute('src'); - } - // 如果是标签属性,直接取节点值 - elseif ($nodeType == 2 || $nodeType == 3 || $nodeType == 4) - { - $content = $element->nodeValue; - } - else - { - // 保留nodeValue里的html符号,给children二次提取 - $content = self::$dom->saveXml($element); - //$content = trim(self::$dom->saveHtml($element)); - $content = preg_replace(array("#^<{$nodeName}.*>#isU","#$#isU"), array('', ''), $content); - } - } - $result[] = $content; - } - } - if (empty($result)) - { - return null; - } - // 如果只有一个元素就直接返回string,否则返回数组 - return count($result) > 1 ? $result : $result[0]; - } - - /** - * css选择器 - * - * @param mixed $html - * @param mixed $selector - * @return void - * @author seatle - * @created time :2016-10-26 12:53 - */ - private static function _css_select($html, $selector, $remove = false) - { - $selector = self::css_to_xpath($selector); - //echo $selector."\n"; - //exit("\n"); - return self::_xpath_select($html, $selector, $remove); - // 如果加载的不是之前的HTML内容,替换一下验证标识 - //if (self::$dom_auth['css'] != md5($html)) - //{ - //self::$dom_auth['css'] = md5($html); - //phpQuery::loadDocumentHTML($html); - //} - //if ($remove) - //{ - //return phpQuery::pq($selector)->remove(); - //} - //else - //{ - //return phpQuery::pq($selector)->html(); - //} - } - - /** - * 正则选择器 - * - * @param mixed $html - * @param mixed $selector - * @return void - * @author seatle - * @created time :2016-10-26 12:53 - */ - private static function _regex_select($html, $selector, $remove = false) - { - if(@preg_match_all($selector, $html, $out) === false) - { - self::$error = "the selector in the regex(\"{$selector}\") syntax errors"; - return null; - } - $count = count($out); - $result = array(); - // 一个都没有匹配到 - if ($count == 0) - { - return null; - } - // 只匹配一个,就是只有一个 () - elseif ($count == 2) - { - // 删除的话取匹配到的所有内容 - if ($remove) - { - $result = $out[0]; - } - else - { - $result = $out[1]; - } - } - else - { - for ($i = 1; $i < $count; $i++) - { - // 如果只有一个元素,就直接返回好了 - $result[] = count($out[$i]) > 1 ? $out[$i] : $out[$i][0]; - } - } - if (empty($result)) - { - return null; - } - - return count($result) > 1 ? $result : $result[0]; - } - - public static function find_all($html, $selector) - { - } - - - public static function css_to_xpath($selectors) - { - $queries = self::parse_selector($selectors); - $delimiter_before = false; - $xquery = ''; - foreach($queries as $s) - { - // TAG - $is_tag = preg_match('@^[\w|\||-]+$@', $s) || $s == '*'; - if ($is_tag) - { - $xquery .= $s; - } - // ID - else if ($s[0] == '#') - { - if ($delimiter_before) - { - $xquery .= '*'; - } - // ID用精确查询 - $xquery .= "[@id='".substr($s, 1)."']"; - } - // CLASSES - else if ($s[0] == '.') - { - if ($delimiter_before) - { - $xquery .= '*'; - } - // CLASS用模糊查询 - $xquery .= "[contains(@class,'".substr($s, 1)."')]"; - } - // ATTRIBUTES - else if ($s[0] == '[') - { - if ($delimiter_before) - { - $xquery .= '*'; - } - // strip side brackets - $attr = trim($s, ']['); - // attr with specifed value - if (mb_strpos($s, '=')) - { - $value = null; - list($attr, $value) = explode('=', $attr); - $value = trim($value, "'\""); - if (self::is_regexp($attr)) - { - // cut regexp character - $attr = substr($attr, 0, -1); - $xquery .= "[@{$attr}]"; - } - else - { - $xquery .= "[@{$attr}='{$value}']"; - } - } - // attr without specified value - else - { - $xquery .= "[@{$attr}]"; - } - } - // ~ General Sibling Selector - else if ($s[0] == '~') - { - } - // + Adjacent sibling selectors - else if ($s[0] == '+') - { - } - // PSEUDO CLASSES - else if ($s[0] == ':') - { - } - // DIRECT DESCENDANDS - else if ($s == '>') - { - $xquery .= '/'; - $delimiter_before = 2; - } - // ALL DESCENDANDS - else if ($s == ' ') - { - $xquery .= '//'; - $delimiter_before = 2; - } - // ERRORS - else - { - exit("Unrecognized token '$s'"); - } - $delimiter_before = $delimiter_before === 2; - } - return $xquery; - } - - /** - * @access private - */ - public static function parse_selector($query) - { - $query = trim( preg_replace( '@\s+@', ' ', preg_replace('@\s*(>|\\+|~)\s*@', '\\1', $query) ) ); - $queries = array(); - if ( !$query ) - { - return $queries; - } - - $special_chars = array('>',' '); - $special_chars_mapping = array(); - $strlen = mb_strlen($query); - $class_chars = array('.', '-'); - $pseudo_chars = array('-'); - $tag_chars = array('*', '|', '-'); - // split multibyte string - // http://code.google.com/p/phpquery/issues/detail?id=76 - $_query = array(); - for ( $i=0; $i<$strlen; $i++ ) - { - $_query[] = mb_substr($query, $i, 1); - } - $query = $_query; - // it works, but i dont like it... - $i = 0; - while( $i < $strlen ) - { - $c = $query[$i]; - $tmp = ''; - // TAG - if ( self::is_char($c) || in_array($c, $tag_chars) ) - { - while(isset($query[$i]) && (self::is_char($query[$i]) || in_array($query[$i], $tag_chars))) - { - $tmp .= $query[$i]; - $i++; - } - $queries[] = $tmp; - } - // IDs - else if ( $c == '#' ) - { - $i++; - while( isset($query[$i]) && (self::is_char($query[$i]) || $query[$i] == '-') ) - { - $tmp .= $query[$i]; - $i++; - } - $queries[] = '#'.$tmp; - } - // SPECIAL CHARS - else if ( in_array($c, $special_chars) ) - { - $queries[] = $c; - $i++; - // MAPPED SPECIAL MULTICHARS - // } else if ( $c.$query[$i+1] == '//') { - // $return[] = ' '; - // $i = $i+2; - } - // MAPPED SPECIAL CHARS - else if ( isset($special_chars_mapping[$c])) - { - $queries[] = $special_chars_mapping[$c]; - $i++; - } - // COMMA - else if ( $c == ',' ) - { - $i++; - while( isset($query[$i]) && $query[$i] == ' ') - { - $i++; - } - } - // CLASSES - else if ($c == '.') - { - while( isset($query[$i]) && (self::is_char($query[$i]) || in_array($query[$i], $class_chars))) - { - $tmp .= $query[$i]; - $i++; - } - $queries[] = $tmp; - } - // ~ General Sibling Selector - else if ($c == '~') - { - $space_allowed = true; - $tmp .= $query[$i++]; - while( isset($query[$i]) - && (self::is_char($query[$i]) - || in_array($query[$i], $class_chars) - || $query[$i] == '*' - || ($query[$i] == ' ' && $space_allowed) - )) - { - if ($query[$i] != ' ') - { - $space_allowed = false; - } - $tmp .= $query[$i]; - $i++; - } - $queries[] = $tmp; - } - // + Adjacent sibling selectors - else if ($c == '+') - { - $space_allowed = true; - $tmp .= $query[$i++]; - while( isset($query[$i]) - && (self::is_char($query[$i]) - || in_array($query[$i], $class_chars) - || $query[$i] == '*' - || ($space_allowed && $query[$i] == ' ') - )) - { - if ($query[$i] != ' ') - $space_allowed = false; - $tmp .= $query[$i]; - $i++; - } - $queries[] = $tmp; - } - // ATTRS - else if ($c == '[') - { - $stack = 1; - $tmp .= $c; - while( isset($query[++$i])) - { - $tmp .= $query[$i]; - if ( $query[$i] == '[') - { - $stack++; - } - else if ( $query[$i] == ']') - { - $stack--; - if (! $stack ) - { - break; - } - } - } - $queries[] = $tmp; - $i++; - } - // PSEUDO CLASSES - else if ($c == ':') - { - $stack = 1; - $tmp .= $query[$i++]; - while( isset($query[$i]) && (self::is_char($query[$i]) || in_array($query[$i], $pseudo_chars))) - { - $tmp .= $query[$i]; - $i++; - } - // with arguments ? - if ( isset($query[$i]) && $query[$i] == '(') - { - $tmp .= $query[$i]; - $stack = 1; - while( isset($query[++$i])) - { - $tmp .= $query[$i]; - if ( $query[$i] == '(') - { - $stack++; - } - else if ( $query[$i] == ')') - { - $stack--; - if (! $stack ) - { - break; - } - } - } - $queries[] = $tmp; - $i++; - } - else - { - $queries[] = $tmp; - } - } - else - { - $i++; - } - } - - if (isset($queries[0])) - { - if (isset($queries[0][0]) && $queries[0][0] == ':') - { - array_unshift($queries, '*'); - } - if ($queries[0] != '>') - { - array_unshift($queries, ' '); - } - } - - return $queries; - } - - public static function is_char($char) - { - return preg_match('@\w@', $char); - } - - /** - * 模糊匹配 - * ^ 前缀字符串 - * * 包含字符串 - * $ 后缀字符串 - * @access private - */ - protected static function is_regexp($pattern) - { - return in_array( - $pattern[ mb_strlen($pattern)-1 ], - array('^','*','$') - ); - } -} diff --git a/vendor/owner888/phpspider/core/util.php b/vendor/owner888/phpspider/core/util.php deleted file mode 100644 index 6d6f811..0000000 --- a/vendor/owner888/phpspider/core/util.php +++ /dev/null @@ -1,936 +0,0 @@ - -// +---------------------------------------------------------------------- - -//---------------------------------- -// PHPSpider实用函数集合类文件 -//---------------------------------- - -namespace phpspider\core; -// 引入PATH_DATA -require_once __DIR__ . '/constants.php'; - -class util -{ - /** - * 文件锁 - * 如果没有锁,就加一把锁并且执行逻辑,然后删除锁 - * if (!util::lock('statistics_offer')) - * { - * util::lock('statistics_offer'); - * ... - * util::unlock('statistics_offer'); - * } - * 否则输出锁存在 - * else - * { - * echo "process has been locked\n"; - * } - * - * @param mixed $lock_name - * @param int $lock_timeout - * @return void - * @author seatle - * @created time :2016-02-18 14:28 - */ - public static function lock($lock_name, $lock_timeout = 600) - { - $lock = util::get_file(PATH_DATA."/lock/{$lock_name}.lock"); - if ($lock) - { - $time = time() - $lock; - // 还没到10分钟,说明进程还活着 - if ($time < $lock_timeout) - { - return true; - } - unlink(PATH_DATA."/lock/{$lock_name}.lock"); - } - util::put_file(PATH_DATA."/lock/{$lock_name}.lock", time()); - return false; - } - - public static function unlock($lock_name) - { - unlink(PATH_DATA."/lock/{$lock_name}.lock"); - } - - public static function time2second($time, $is_log = true) - { - if(is_numeric($time)) - { - $value = array( - "years" => 0, "days" => 0, "hours" => 0, - "minutes" => 0, "seconds" => 0, - ); - if($time >= 31556926) - { - $value["years"] = floor($time/31556926); - $time = ($time%31556926); - } - if($time >= 86400) - { - $value["days"] = floor($time/86400); - $time = ($time%86400); - } - if($time >= 3600) - { - $value["hours"] = floor($time/3600); - $time = ($time%3600); - } - if($time >= 60) - { - $value["minutes"] = floor($time/60); - $time = ($time%60); - } - $value["seconds"] = floor($time); - //return (array) $value; - //$t = $value["years"] ."y ". $value["days"] ."d ". $value["hours"] ."h ". $value["minutes"] ."m ".$value["seconds"]."s"; - if ($is_log) - { - $t = $value["days"] ."d ". $value["hours"] ."h ". $value["minutes"] ."m ".$value["seconds"]."s"; - } - else - { - $t = $value["days"] ." days ". $value["hours"] ." hours ". $value["minutes"] ." minutes"; - } - return $t; - - } - else - { - return false; - } - } - - public static function get_days($day_sta, $day_end = true, $range = 86400) - { - if ($day_end === true) $day_end = date('Y-m-d'); - - return array_map(function ($time) { - return date('Y-m-d', $time); - }, range(strtotime($day_sta), strtotime($day_end), $range)); - } - - /** - * 获取文件行数 - * - * @param mixed $filepath - * @return void - * @author seatle - * @created time :2016-03-31 21:54 - */ - public static function get_file_line($filepath) - { - $line = 0 ; - $fp = fopen($filepath , 'r'); - if (!$fp) - { - return 0; - } - //获取文件的一行内容,注意:需要php5才支持该函数; - while( stream_get_line($fp,8192,"\n") ){ - $line++; - } - fclose($fp);//关闭文件 - return $line; - } - - /** - * 获得表数 - * - * @param mixed $table_name 表名 - * @param mixed $item_value 唯一索引 - * @param int $table_num 表数量 - * @return void - * @author seatle - * @created time :2015-10-22 23:25 - */ - public static function get_table_num($item_value, $table_num = 100) - { - //sha1:返回一个40字符长度的16进制数字 - $item_value = sha1(strtolower($item_value)); - //base_convert:进制建转换,下面是把16进制转成10进制,方便做除法运算 - //str_pad:把字符串填充为指定的长度,下面是在左边加0,表数量大于100就3位,否则2位 - $step = $table_num > 100 ? 3 : 2; - $item_value = str_pad(base_convert(substr($item_value, -2), 16, 10) % $table_num, $step, "0", STR_PAD_LEFT); - return $item_value; - } - - /** - * 获得表面 - * - * @param mixed $table_name 表名 - * @param mixed $item_value 唯一索引 - * @param int $table_num 表数量 - * @return void - * @author seatle - * @created time :2015-10-22 23:25 - */ - public static function get_table_name($table_name, $item_value, $table_num = 100) - { - //sha1:返回一个40字符长度的16进制数字 - $item_value = sha1(strtolower($item_value)); - //base_convert:进制建转换,下面是把16进制转成10进制,方便做除法运算 - //str_pad:把字符串填充为指定的长度,下面是在左边加0,共3位 - $step = $table_num > 100 ? 3 : 2; - $item_value = str_pad(base_convert(substr($item_value, -2), 16, 10) % $table_num, $step, "0", STR_PAD_LEFT); - return $table_name."_".$item_value; - } - - // 获得当前使用内存 - public static function memory_get_usage() - { - $memory = memory_get_usage(); - return self::format_bytes($memory); - } - - // 获得最高使用内存 - public static function memory_get_peak_usage() - { - $memory = memory_get_peak_usage(); - return self::format_bytes($memory); - } - - // 转换大小单位 - public static function format_bytes($size) - { - $unit = array('b', 'kb', 'mb', 'gb', 'tb', 'pb'); - return @round($size / pow(1024, ($i = floor(log($size, 1024)))), 2) . ' ' . $unit[$i]; - } - - /** - * 获取数组大小 - * - * @param mixed $arr 数组 - * @return string - */ - public static function array_size($arr) - { - ob_start(); - print_r($arr); - $mem = ob_get_contents(); - ob_end_clean(); - $mem = preg_replace("/\n +/", "", $mem); - $mem = strlen($mem); - return self::format_bytes($mem); - } - - /** - * 数字随机数 - * - * @param int $num - * @return void - * @author seatle - * @created time :2016-09-18 10:17 - */ - public static function rand_num($num = 7) - { - $rand = ""; - for ($i = 0; $i < $num; $i ++) - { - $rand .= mt_rand(0, 9); - } - return $rand; - } - - /** - * 字母数字混合随机数 - * - * @param int $num - * @return void - * @author seatle - * @created time :2016-09-18 10:17 - */ - public static function rand_str($num = 10) - { - $chars = 'abcdefghijklmnopqrstuvwxyz0123456789'; - $string = ""; - for ($i = 0; $i < $num; $i ++) - { - $string .= substr($chars, rand(0, strlen($chars)), 1); - } - return $string; - } - - /** - * 汉字转拼音 - * - * @param mixed $str 汉字 - * @param int $ishead - * @param int $isclose - * @static - * @access public - * @return string - */ - public static function pinyin($str, $ishead = 0, $isclose = 1) - { - // $str = iconv("utf-8", "gbk//ignore", $str); - $str = mb_convert_encoding($str, "gbk", "utf-8"); - global $pinyins; - $restr = ''; - $str = trim($str); - $slen = strlen($str); - if ($slen < 2) - { - return $str; - } - if (count($pinyins) == 0) - { - $fp = fopen(PATH_DATA . '/pinyin.dat', 'r'); - while (!feof($fp)) - { - $line = trim(fgets($fp)); - $pinyins[$line[0] . $line[1]] = substr($line, 3, strlen($line) - 3); - } - fclose($fp); - } - for ($i = 0; $i < $slen; $i ++) - { - if (ord($str[$i]) > 0x80) - { - $c = $str[$i] . $str[$i + 1]; - $i ++; - if (isset($pinyins[$c])) - { - if ($ishead == 0) - { - $restr .= $pinyins[$c]; - } - else - { - $restr .= $pinyins[$c][0]; - } - } - else - { - // $restr .= "_"; - } - } - else if (preg_match("/[a-z0-9]/i", $str[$i])) - { - $restr .= $str[$i]; - } - else - { - // $restr .= "_"; - } - } - if ($isclose == 0) - { - unset($pinyins); - } - return $restr; - } - - /** - * 生成字母前缀 - * - * @param mixed $s0 - * @return char - * @author seatle - * @created time :2016-09-18 10:17 - */ - public static function letter_first($s0) - { - $firstchar_ord = ord(strtoupper($s0{0})); - if (($firstchar_ord >= 65 and $firstchar_ord <= 91) or ($firstchar_ord >= 48 and $firstchar_ord <= 57)) return $s0{0}; - // $s = iconv("utf-8", "gbk//ignore", $s0); - $s = mb_convert_encoding($s0, "gbk", "utf-8"); - $asc = ord($s{0}) * 256 + ord($s{1}) - 65536; - if ($asc >= -20319 and $asc <= -20284) return "A"; - if ($asc >= -20283 and $asc <= -19776) return "B"; - if ($asc >= -19775 and $asc <= -19219) return "C"; - if ($asc >= -19218 and $asc <= -18711) return "D"; - if ($asc >= -18710 and $asc <= -18527) return "E"; - if ($asc >= -18526 and $asc <= -18240) return "F"; - if ($asc >= -18239 and $asc <= -17923) return "G"; - if ($asc >= -17922 and $asc <= -17418) return "H"; - if ($asc >= -17417 and $asc <= -16475) return "J"; - if ($asc >= -16474 and $asc <= -16213) return "K"; - if ($asc >= -16212 and $asc <= -15641) return "L"; - if ($asc >= -15640 and $asc <= -15166) return "M"; - if ($asc >= -15165 and $asc <= -14923) return "N"; - if ($asc >= -14922 and $asc <= -14915) return "O"; - if ($asc >= -14914 and $asc <= -14631) return "P"; - if ($asc >= -14630 and $asc <= -14150) return "Q"; - if ($asc >= -14149 and $asc <= -14091) return "R"; - if ($asc >= -14090 and $asc <= -13319) return "S"; - if ($asc >= -13318 and $asc <= -12839) return "T"; - if ($asc >= -12838 and $asc <= -12557) return "W"; - if ($asc >= -12556 and $asc <= -11848) return "X"; - if ($asc >= -11847 and $asc <= -11056) return "Y"; - if ($asc >= -11055 and $asc <= -10247) return "Z"; - return 0; // null - } - - /** - * 获得某天前的时间戳 - * - * @param mixed $day - * @return void - * @author seatle - * @created time :2016-09-18 10:17 - */ - public static function getxtime($day) - { - $day = intval($day); - return mktime(23, 59, 59, date("m"), date("d") - $day, date("y")); - } - - /** - * 读文件 - */ - public static function get_file($url, $timeout = 10) - { - if (function_exists('curl_init')) - { - $ch = curl_init(); - curl_setopt($ch, CURLOPT_URL, $url); - curl_setopt($ch, CURLOPT_HEADER, 0); - curl_setopt($ch, CURLOPT_TIMEOUT, $timeout); - curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); - curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 10); - $content = curl_exec($ch); - curl_close($ch); - if ($content) return $content; - } - $ctx = stream_context_create(array('http' => array('timeout' => $timeout))); - $content = @file_get_contents($url, 0, $ctx); - if ($content) return $content; - return false; - } - - /** - * 写文件,如果文件目录不存在,则递归生成 - */ - public static function put_file($file, $content, $flag = 0) - { - $pathinfo = pathinfo($file); - if (!empty($pathinfo['dirname'])) - { - if (file_exists($pathinfo['dirname']) === false) - { - if (@mkdir($pathinfo['dirname'], 0777, true) === false) - { - return false; - } - } - } - if ($flag === FILE_APPEND) - { - // 多个php-fpm写一个文件的时候容易丢失,要加锁 - //return @file_put_contents($file, $content, FILE_APPEND|LOCK_EX); - return @file_put_contents($file, $content, FILE_APPEND); - } - else - { - return @file_put_contents($file, $content, LOCK_EX); - } - } - - /** - * 检查路径是否存在,不存在则递归生成路径 - * - * @param mixed $path 路径 - * @static - * @access public - * @return bool or string - */ - public static function path_exists($path) - { - $pathinfo = pathinfo($path . '/tmp.txt'); - if (!empty($pathinfo['dirname'])) - { - if (file_exists($pathinfo['dirname']) === false) - { - if (mkdir($pathinfo['dirname'], 0777, true) === false) - { - return false; - } - } - } - return $path; - } - - /** - * 递归删除目录 - * - * @param mixed $dir - * @return void - * @author seatle - * @created time :2016-09-18 10:17 - */ - public static function deldir($dir) - { - //先删除目录下的文件: - $dh = opendir($dir); - while ($file = readdir($dh)) - { - if($file!="." && $file!="..") - { - $fullpath = $dir."/".$file; - if(!is_dir($fullpath)) - { - unlink($fullpath); - } - else - { - self::deldir($fullpath); - } - } - } - - closedir($dh); - //删除当前文件夹: - if(rmdir($dir)) - { - return true; - } - else - { - return false; - } - } - - /** - * 递归修改目录权限 - * - * @param mixed $path 目录 - * @param mixed $filemode 权限 - * @return bool - */ - public static function chmodr($path, $filemode) - { - if (!is_dir($path)) - { - return @chmod($path, $filemode); - } - - $dh = opendir($path); - while (($file = readdir($dh)) !== false) - { - if ($file != '.' && $file != '..') - { - $fullpath = $path . '/' . $file; - if (is_link($fullpath)) - { - return FALSE; - } - elseif (!is_dir($fullpath) && !@chmod($fullpath, $filemode)) - { - return FALSE; - } - elseif (!self::chmodr($fullpath, $filemode)) - { - return FALSE; - } - } - } - - closedir($dh); - - if (@chmod($path, $filemode)) - { - return TRUE; - } - else - { - return FALSE; - } - } - - /** - * 数组格式化为CSV - * - * @param mixed $data - * @return void - * @author seatle - * @created time :2016-07-29 11:32 - */ - public static function format_csv($data) - { - foreach ($data as $k=>$v) - { - $v = str_replace(",", "", $v); - $v = str_replace(",", "", $v); - $data[$k] = $v; - } - return implode(",", $data); - } - - /** - * 判断是否为utf8字符串 - * @parem $str - * @return bool - */ - public static function is_utf8($str) - { - if ($str === mb_convert_encoding(mb_convert_encoding($str, "UTF-32", "UTF-8"), "UTF-8", "UTF-32")) - { - return true; - } - else - { - return false; - } - } - - /** - * 获取文件编码 - * @param $string - * @return string - */ - public static function get_encoding($string) - { - $encoding = mb_detect_encoding($string, array('UTF-8', 'GBK', 'GB2312', 'LATIN1', 'ASCII', 'BIG5')); - return strtolower($encoding); - } - - /** - * 转换数组值的编码格式 - * @param array $arr - * @param string $toEncoding - * @param string $fromEncoding - * @return array - */ - public static function array_iconv($arr, $from_encoding, $to_encoding) - { - eval('$arr = '.iconv($from_encoding, $to_encoding.'//IGNORE', var_export($arr,TRUE)).';'); - return $arr; - } - - /** - * 从普通时间返回Linux时间截(strtotime中文处理版) - * @parem string $dtime - * @return int - */ - public static function cn_strtotime($dtime) - { - if (!preg_match("/[^0-9]/", $dtime)) - { - return $dtime; - } - $dtime = trim($dtime); - $dt = Array(1970, 1, 1, 0, 0, 0); - $dtime = preg_replace("/[\r\n\t]|日|秒/", " ", $dtime); - $dtime = str_replace("年", "-", $dtime); - $dtime = str_replace("月", "-", $dtime); - $dtime = str_replace("时", ":", $dtime); - $dtime = str_replace("分", ":", $dtime); - $dtime = trim(preg_replace("/[ ]{1,}/", " ", $dtime)); - $ds = explode(" ", $dtime); - $ymd = explode("-", $ds[0]); - if (!isset($ymd[1])) - { - $ymd = explode(".", $ds[0]); - } - if (isset($ymd[0])) - { - $dt[0] = $ymd[0]; - } - if (isset($ymd[1])) $dt[1] = $ymd[1]; - if (isset($ymd[2])) $dt[2] = $ymd[2]; - if (strlen($dt[0]) == 2) $dt[0] = '20' . $dt[0]; - if (isset($ds[1])) - { - $hms = explode(":", $ds[1]); - if (isset($hms[0])) $dt[3] = $hms[0]; - if (isset($hms[1])) $dt[4] = $hms[1]; - if (isset($hms[2])) $dt[5] = $hms[2]; - } - foreach ($dt as $k => $v) - { - $v = preg_replace("/^0{1,}/", '', trim($v)); - if ($v == '') - { - $dt[$k] = 0; - } - } - $mt = mktime($dt[3], $dt[4], $dt[5], $dt[1], $dt[2], $dt[0]); - if (!empty($mt)) - { - return $mt; - } - else - { - return strtotime($dtime); - } - } - - public static function cn_substr($string, $length = 80, $etc = '...', $count_words = true) - { - mb_internal_encoding("UTF-8"); - if ($length == 0) return ''; - if (strlen($string) <= $length) return $string; - preg_match_all("/[\x01-\x7f]|[\xc2-\xdf][\x80-\xbf]|\xe0[\xa0-\xbf][\x80-\xbf]|[\xe1-\xef][\x80-\xbf][\x80-\xbf]|\xf0[\x90-\xbf][\x80-\xbf][\x80-\xbf]|[\xf1-\xf7][\x80-\xbf][\x80-\xbf][\x80-\xbf]/", $string, $info); - if ($count_words) - { - $j = 0; - $wordscut = ""; - for ($i = 0; $i < count($info[0]); $i ++) - { - $wordscut .= $info[0][$i]; - if (ord($info[0][$i]) >= 128) - { - $j = $j + 2; - } - else - { - $j = $j + 1; - } - if ($j >= $length) - { - return $wordscut . $etc; - } - } - return join('', $info[0]); - } - return join("", array_slice($info[0], 0, $length)) . $etc; - } - - /** - * 获取文件后缀名 - * - * @param mixed $file_name 文件名 - * @static - * - * @access public - * @return string - */ - public static function get_extension($file_name) - { - $ext = explode('.', $file_name); - $ext = array_pop($ext); - return strtolower($ext); - } - - // 获取 Url 跳转后的真实地址 - public static function getrealurl($url) - { - if (empty($url)) - { - return $url; - } - $header = get_headers($url, 1); - if (empty($header[0]) || empty($header[1])) - { - return $url; - } - if (strpos($header[0], '301') || strpos($header[0], '302')) - { - if (empty($header['Location'])) - { - return $url; - } - if (is_array($header['Location'])) - { - return $header['Location'][count($header['Location']) - 1]; - } - else - { - return $header['Location']; - } - } - else - { - return $url; - } - } - - // 解压服务器用 Content-Encoding:gzip 压缩过的数据 - public static function gzdecode($data) - { - $flags = ord(substr($data, 3, 1)); - $headerlen = 10; - $extralen = 0; - $filenamelen = 0; - if ($flags & 4) - { - $extralen = unpack('v', substr($data, 10, 2)); - $extralen = $extralen[1]; - $headerlen += 2 + $extralen; - } - if ($flags & 8) // Filename - $headerlen = strpos($data, chr(0), $headerlen) + 1; - if ($flags & 16) // Comment - $headerlen = strpos($data, chr(0), $headerlen) + 1; - if ($flags & 2) // CRC at end of file - $headerlen += 2; - $unpacked = @gzinflate(substr($data, $headerlen)); - if ($unpacked === FALSE) $unpacked = $data; - return $unpacked; - } - - /** - * 数字金额转换为中文 - * @param string|integer|float $num 目标数字 - * @param boolean $sim 使用小写(默认) - * @return string - */ - public static function number2chinese($num, $sim = FALSE) - { - if (!is_numeric($num)) return '含有非数字非小数点字符!'; - $char = $sim ? array('零', '一', '二', '三', '四', '五', '六', '七', '八', '九') : array('零', '壹', '贰', '叁', '肆', '伍', '陆', '柒', '捌', '玖'); - $unit = $sim ? array('', '十', '百', '千', '', '万', '亿', '兆') : array('', '拾', '佰', '仟', '', '萬', '億', '兆'); - $retval = ''; - - $num = sprintf("%01.2f", $num); - - list ($num, $dec) = explode('.', $num); - - // 小数部分 - if ($dec['0'] > 0) - { - $retval .= "{$char[$dec['0']]}角"; - } - if ($dec['1'] > 0) - { - $retval .= "{$char[$dec['1']]}分"; - } - - // 整数部分 - if ($num > 0) - { - $retval = "元" . $retval; - $f = 1; - $str = strrev(intval($num)); - for ($i = 0, $c = strlen($str); $i < $c; $i ++) - { - if ($str[$i] > 0) - { - $f = 0; - } - if ($f == 1 && $str[$i] == 0) - { - $out[$i] = ""; - } - else - { - $out[$i] = $char[$str[$i]]; - } - $out[$i] .= $str[$i] != '0' ? $unit[$i % 4] : ''; - if ($i > 1 and $str[$i] + $str[$i - 1] == 0) - { - $out[$i] = ''; - } - if ($i % 4 == 0) - { - $out[$i] .= $unit[4 + floor($i / 4)]; - } - } - $retval = join('', array_reverse($out)) . $retval; - } - return $retval; - } - - public static function colorize($str, $status = "info") - { - $out = ""; - switch ($status) - { - case 'succ': - $out = "\033[32m"; // Blue - break; - case "error": - $out = "\033[31m"; // Red - break; - case "warn": - $out = "\033[33m"; // Yellow - break; - case "note": - $out = "\033[34m"; // Green - break; - case "debug": - $out = "\033[36m"; // Green - break; - default: - $out = "\033[0m"; // info - break; - } - return $out.$str."\033[0m"; - } - - public static function node_to_array($dom, $node) - { - if(!is_a( $dom, 'DOMDocument' ) || !is_a( $node, 'DOMNode' )) - { - return false; - } - - $array = array(); - // Discard empty nodes - $localName = trim( $node->localName ); - if( empty($localName)) - { - return false; - } - if( XML_TEXT_NODE == $node->nodeType ) - { - return $node->nodeValue; - } - foreach ($node->attributes as $attr) - { - $array['@'.$attr->localName] = $attr->nodeValue; - } - foreach ($node->childNodes as $childNode) - { - if ( (isset($childNode->childNodes->length) && 1 == $childNode->childNodes->length) && - XML_TEXT_NODE == $childNode->firstChild->nodeType ) - { - $array[$childNode->localName] = $childNode->nodeValue; - } - else - { - if( false !== ($a = self::node_to_array( $dom, $childNode))) - { - $array[$childNode->localName] = $a; - } - } - } - return $array; - } - - public static function is_win() - { - return strtoupper(substr(PHP_OS,0,3))==="WIN"; - } - - /** - * 和 http_build_query 相反,分解出参数 - * - * @return void - * @author seatle - * @created time :2016-05-16 17:29 - */ - public static function http_split_query($query, $is_query = false) - { - if (!$is_query) - { - $parse_arr = parse_url($query); - if (empty($parse_arr['query'])) - { - return array(); - } - $query = $parse_arr['query']; - } - - $query_arr = explode("&", $query); - $params = array(); - foreach ($query_arr as $val) - { - $arr = explode("=", $val); - $params[$arr[0]] = $arr[1]; - } - return $params; - } -} - - diff --git a/vendor/owner888/phpspider/core/worker.php b/vendor/owner888/phpspider/core/worker.php deleted file mode 100644 index b09955f..0000000 --- a/vendor/owner888/phpspider/core/worker.php +++ /dev/null @@ -1,421 +0,0 @@ - -// +---------------------------------------------------------------------- - -//---------------------------------- -// Worker多进程操作类 -//---------------------------------- - -class worker -{ - // worker进程数 - public $count = 0; - // worker id,worker进程从1开始,0被master进程所使用 - public $worker_id = 0; - // worker 进程ID - public $worker_pid = 0; - // 进程用户 - public $user = ''; - // 进程名 - public $title = ''; - // 每个进程是否只运行一次 - public $run_once = true; - // 是否输出日志 - public $log_show = false; - // master进程启动回调 - public $on_start = false; - // master进程停止回调 - public $on_stop = false; - // worker进程启动回调 - public $on_worker_start = false; - // worker进程停止回调 - public $on_worker_stop = false; - // master进程ID - protected static $_master_pid = 0; - // worker进程ID - protected static $_worker_pids = array(); - // master、worker进程启动时间 - public $time_start = 0; - // master、worker进程运行状态 [starting|running|shutdown|reload] - protected static $_status = "starting"; - - - public function __construct() - { - self::$_master_pid = posix_getpid(); - // 产生时钟云,添加后父进程才可以收到信号 - declare(ticks = 1); - $this->install_signal(); - } - - /** - * 安装信号处理函数 - * @return void - */ - protected function install_signal() - { - // stop - pcntl_signal(SIGINT, array($this, 'signal_handler'), false); - // reload - pcntl_signal(SIGUSR1, array($this, 'signal_handler'), false); - // status - pcntl_signal(SIGUSR2, array($this, 'signal_handler'), false); - // ignore - pcntl_signal(SIGPIPE, SIG_IGN, false); - // install signal handler for dead kids - // pcntl_signal(SIGCHLD, array($this, 'signal_handler')); - } - - /** - * 卸载信号处理函数 - * @return void - */ - protected function uninstall_signal() - { - // uninstall stop signal handler - pcntl_signal(SIGINT, SIG_IGN, false); - // uninstall reload signal handler - pcntl_signal(SIGUSR1, SIG_IGN, false); - // uninstall status signal handler - pcntl_signal(SIGUSR2, SIG_IGN, false); - } - - /** - * 信号处理函数,会被其他类调用到,所以要设置为public - * @param int $signal - */ - public function signal_handler($signal) { - switch ($signal) { - // stop 2 - case SIGINT: - // master进程和worker进程都会调用 - $this->stop_all(); - break; - // reload 30 - case SIGUSR1: - echo "reload\n"; - break; - // show status 31 - case SIGUSR2: - echo "status\n"; - break; - } - } - - /** - * 运行worker实例 - */ - public function run() - { - $this->time_start = microtime(true); - $this->worker_id = 0; - $this->worker_pid = posix_getpid(); - $this->set_process_title($this->title); - - // 这里赋值,worker进程也会克隆到 - if ($this->log_show) - { - log::$log_show = true; - } - - if ($this->on_start) - { - call_user_func($this->on_start, $this); - } - - // worker进程从1开始,0被master进程所使用 - for ($i = 1; $i <= $this->count; $i++) - { - $this->fork_one_worker($i); - } - $this->monitor_workers(); - } - - /** - * 创建一个子进程 - * @param Worker $worker - * @throws Exception - */ - public function fork_one_worker($worker_id) - { - //$sockets = stream_socket_pair(STREAM_PF_UNIX, STREAM_SOCK_STREAM, STREAM_IPPROTO_IP); - $pid = pcntl_fork(); - - // 主进程记录子进程pid - if($pid > 0) - { - self::$_worker_pids[$worker_id] = $pid; - } - // 子进程运行 - elseif(0 === $pid) - { - $this->time_start = microtime(true); - $this->worker_id = $worker_id; - $this->worker_pid = posix_getpid(); - $this->set_process_title($this->title); - $this->set_process_user($this->user); - // 清空master进程克隆过来的worker进程ID - self::$_worker_pids = array(); - //$this->uninstall_signal(); - - // 设置worker进程的运行状态为运行中 - self::$_status = "running"; - - // 注册进程退出回调,用来检查是否有错误(子进程里面注册) - register_shutdown_function(array($this, 'check_errors')); - - // 如果设置了worker进程启动回调函数 - if ($this->on_worker_start) - { - call_user_func($this->on_worker_start, $this); - } - - // 停止当前worker实例 - $this->stop(); - // 这里用0表示正常退出 - exit(0); - } - else - { - log::add("fork one worker fail", "Error"); - exit; - } - } - - /** - * 尝试设置运行当前进程的用户 - * - * @param $user_name - */ - protected static function set_process_user($user_name) - { - // 用户名为空 或者 当前用户不是root用户 - if(empty($user_name) || posix_getuid() !== 0) - { - return; - } - $user_info = posix_getpwnam($user_name); - if($user_info['uid'] != posix_getuid() || $user_info['gid'] != posix_getgid()) - { - if(!posix_setgid($user_info['gid']) || !posix_setuid($user_info['uid'])) - { - log::add('Can not run woker as '.$user_name." , You shuld be root", "Error"); - } - } - } - - /** - * 设置当前进程的名称,在ps aux命令中有用 - * 注意 需要php>=5.5或者安装了protitle扩展 - * @param string $title - * @return void - */ - protected function set_process_title($title) - { - if (!empty($title)) - { - // 需要扩展 - if(extension_loaded('proctitle') && function_exists('setproctitle')) - { - @setproctitle($title); - } - // >=php 5.5 - elseif (function_exists('cli_set_process_title')) - { - cli_set_process_title($title); - } - } - } - - /** - * 监控所有子进程的退出事件及退出码 - * @return void - */ - public function monitor_workers() - { - // 设置master进程的运行状态为运行中 - self::$_status = "running"; - while(1) - { - // pcntl_signal_dispatch 子进程无法接受到信号 - // 如果有信号到来,尝试触发信号处理函数 - //pcntl_signal_dispatch(); - // 挂起进程,直到有子进程退出或者被信号打断 - $status = 0; - $pid = pcntl_wait($status, WUNTRACED); - // 如果有信号到来,尝试触发信号处理函数 - //pcntl_signal_dispatch(); - - // 子进程退出信号 - if($pid > 0) - { - //echo "worker[".$pid."] stop\n"; - //$this->stop(); - - // 如果不是正常退出,是被kill等杀掉的 - if($status !== 0) - { - log::add("worker {$pid} exit with status $status", "Warning"); - } - - // key 和 value 互换 - $worker_pids = array_flip(self::$_worker_pids); - // 通过 pid 得到 worker_id - $worker_id = $worker_pids[$pid]; - // 这里不unset掉,是为了进程重启 - self::$_worker_pids[$worker_id] = 0; - //unset(self::$_worker_pids[$pid]); - - // 再生成一个worker - if (!$this->run_once) - { - $this->fork_one_worker($worker_id); - } - - // 如果所有子进程都退出了,触发主进程退出函数 - $all_worker_stop = true; - foreach (self::$_worker_pids as $_worker_pid) - { - // 只要有一个worker进程还存在进程ID,就不算退出 - if ($_worker_pid != 0) - { - $all_worker_stop = false; - } - } - if ($all_worker_stop) - { - if ($this->on_stop) - { - call_user_func($this->on_stop, $this); - } - exit(0); - } - } - // 其他信号 - else - { - // worker进程接受到master进行信号退出的,会到这里来 - if ($this->on_stop) - { - call_user_func($this->on_stop, $this); - } - exit(0); - } - } - } - - /** - * 执行关闭流程(所有进程) - * 事件触发,非正常程序执行完毕 - * @return void - */ - public function stop_all() - { - // 设置master、worker进程的运行状态为关闭状态 - self::$_status = "shutdown"; - // master进程 - if(self::$_master_pid === posix_getpid()) - { - // 循环给worker进程发送关闭信号 - foreach (self::$_worker_pids as $worker_pid) - { - posix_kill($worker_pid, SIGINT); - } - } - // worker进程 - else - { - // 接收到master进程发送的关闭信号之后退出,这里应该考虑业务的完整性,不能强行exit - $this->stop(); - exit(0); - } - } - - /** - * 停止当前worker实例 - * 正常运行结束和接受信号退出,都会调用这个方法 - * @return void - */ - public function stop() - { - if ($this->on_worker_stop) - { - call_user_func($this->on_worker_stop, $this); - } - // 设置worker进程的运行状态为关闭 - self::$_status = "shutdown"; - } - - /** - * 检查错误,PHP exit之前会执行 - * @return void - */ - public function check_errors() - { - // 如果当前worker进程不是正常退出 - if(self::$_status != "shutdown") - { - $error_msg = "WORKER EXIT UNEXPECTED "; - $errors = error_get_last(); - if($errors && ($errors['type'] === E_ERROR || - $errors['type'] === E_PARSE || - $errors['type'] === E_CORE_ERROR || - $errors['type'] === E_COMPILE_ERROR || - $errors['type'] === E_RECOVERABLE_ERROR )) - { - $error_msg .= $this->get_error_type($errors['type']) . " {$errors['message']} in {$errors['file']} on line {$errors['line']}"; - } - log::add($error_msg, 'Error'); - } - } - - /** - * 获取错误类型对应的意义 - * @param integer $type - * @return string - */ - protected function get_error_type($type) - { - switch($type) - { - case E_ERROR: // 1 // - return 'E_ERROR'; - case E_WARNING: // 2 // - return 'E_WARNING'; - case E_PARSE: // 4 // - return 'E_PARSE'; - case E_NOTICE: // 8 // - return 'E_NOTICE'; - case E_CORE_ERROR: // 16 // - return 'E_CORE_ERROR'; - case E_CORE_WARNING: // 32 // - return 'E_CORE_WARNING'; - case E_COMPILE_ERROR: // 64 // - return 'E_COMPILE_ERROR'; - case E_COMPILE_WARNING: // 128 // - return 'E_COMPILE_WARNING'; - case E_USER_ERROR: // 256 // - return 'E_USER_ERROR'; - case E_USER_WARNING: // 512 // - return 'E_USER_WARNING'; - case E_USER_NOTICE: // 1024 // - return 'E_USER_NOTICE'; - case E_STRICT: // 2048 // - return 'E_STRICT'; - case E_RECOVERABLE_ERROR: // 4096 // - return 'E_RECOVERABLE_ERROR'; - case E_DEPRECATED: // 8192 // - return 'E_DEPRECATED'; - case E_USER_DEPRECATED: // 16384 // - return 'E_USER_DEPRECATED'; - } - return ""; - } -} diff --git a/vendor/owner888/phpspider/gitadd.sh b/vendor/owner888/phpspider/gitadd.sh deleted file mode 100644 index 577e558..0000000 --- a/vendor/owner888/phpspider/gitadd.sh +++ /dev/null @@ -1,20 +0,0 @@ -#!/bin/bash -if [ ! -d "$1" ] && [ ! -f "$1" ]; then - echo "file $1 not exists" - exit -fi -filename=$1 - -comment="add file" -if [[ $2 != "" ]]; then - comment=$2 -fi - -echo "start update..." -git pull -echo "start add new file..." -git add $filename -echo "start commit..." -git commit -m "$comment" $filename -git push -u origin master -echo "git commit complete..." diff --git a/vendor/owner888/phpspider/hacked-emails/banners.txt b/vendor/owner888/phpspider/hacked-emails/banners.txt deleted file mode 100644 index 5248192..0000000 --- a/vendor/owner888/phpspider/hacked-emails/banners.txt +++ /dev/null @@ -1,129 +0,0 @@ - - _-o#&&*''''?d:>b\_ - _o/"`'' '',, dMF9MMMMMHo_ - .o&#' `"MbHMMMMMMMMMMMHo. - .o"" ' vodM*$&&HMMMMMMMMMM?. - ,' $M&ood,~'`(&##MMMMMMH\ - / ,MMMMMMM#b?#bobMMMMHMMML - & ?MMMMMMMMMMMMMMMMM7MMM$R*Hk - ?$. :MMMMMMMMMMMMMMMMMMM/HMMM|`*L -| |MMMMMMMMMMMMMMMMMMMMbMH' T, -$H#: `*MMMMMMMMMMMMMMMMMMMMb#]' `? -]MMH# ""*""""*#MMMMMMMMMMMMM' - -MMMMMb_ |MMMMMMMMMMMP' : -HMMMMMMMHo `MMMMMMMMMT . -?MMMMMMMMP 9MMMMMMMM] - --?MMMMMMM |MMMMMMMMM?,d- ' {Name} - :|MMMMMM- `MMMMMMMT .M|. : {Description} - .9MMM[ &MMMMM*' `' . {Loaded} - :9MMk `MMM#" - - &M] ` .- - `&. . - `~, . ./ - . _ .- - '`--._,dd###pp=""' - -$$$$$AnyShIt$$$$$$ - - _v->#H#P? "':o<>\_ - .,dP` `'' "'-o.+H6&MMMHo_ - oHMH9' `?&bHMHMMMMMMHo. - oMP"' ' ooMP*#&HMMMMMMM?. - ,M* - `*MSdob//`^&##MMMH\ - d*' .,MMMMMMH#o>#ooMMMMMb - HM- :HMMMMMMMMMMMMMMM&HM[R\ - d"Z\. 9MMMMMMMMMMMMMMMMM[HMM|: --H - MMMMMMMMMMMMMMMMMMMbMP' : -:??Mb# `9MMMMMMMMMMMMMMMMMMH#! . -: MMMMH#, "*""""`#HMMMMMMMMMMH - -||MMMMMM6\. [MMMMMMMMMH' : -:|MMMMMMMMMMHo `9MMMMMMMM' . -. HMMMMMMMMMMP' !MMMMMMMM ` -- `#MMMMMMMMM HMMMMMMM*,/ : - : ?MMMMMMMF HMMMMMM',P' : {Name} - . HMMMMR' [MMMMP' ^' - {Description} - : `HMMMT iMMH' .' {Loaded} - -.`HMH . - -:*H . ' - -`\,, . .- - ' . _ .-` - '`~\.__,obb#q==~''' - -$$$$$AnyShIt$$$$$$ - - _ood>H&H&Z?#M#b-\. - .\HMMMMMR?`\M6b."`' ''``v. - .. .MMMMMMMMMMHMMM#&. ``~o. - . ,HMMMMMMMMMMMM*"'-` &b. - . .MMMMMMMMMMMMH' `"&\ - - RMMMMM#H##R' 4Mb - - |7MMM' ?:: `|MMb - / HMM__#|`"\>?v.. `MMML -. `"'#Hd| ` 9MMM: -- |\,\?HH#bbL `9MMb -: !MMMMMMMH#b, `""T -. . ,MMMMMMMMMMMbo. | -: 4MMMMMMMMMMMMMMMHo | -: ?MMMMMMMMMMMMMMM? : --. `#MMMMMMMMMMMM: .- - : |MMMMMMMMMM? . - - JMMMMMMMT' : {Name} - `. MMMMMMH' - {Description} - -. |MMM#*` - {Loaded} - . HMH' . ' - -. #H:. .- - ` . .\ .- - '-..-+oodHL_,--/-` - - -$$$$$AnyShIt$$$$$$ - - .,:,#&6dHHHb&##o\_ - .oHHMMMMMMMMMMMMMMMMMH*\,. - oHMMMMMMMMMMMMMMMMMMMMMMHb:'-. - .dMMMMMMMMMMMMMMMMMMMMMMMMMH|\/' . - ,&HMMMMMMMMMMMMMMMMMMMMMMM/"&.,d. -. - dboMMHMMMMMMMMMMMMMMMMMMMMMML `' . - HMHMMM$Z***MMMMMMMMMMMMMMMMMM|.- . - dMM]MMMM#' `9MMMH?"`MMMMR'T' _ : -|MMMbM#'' |MM" ``MMMH. <_ . -dMMMM#& *&. .?`*" .'&: . -MMMMMH- `' -v/H .dD "' ' : -MMMM* `*M: 4MM*::-!v,_ : -MMMM `*?::" "'``"?9Mb::. : -&MMM, `"'"'|"._ "?`| - : -`MMM].H ,#dM[_H ..: - 9MMi`M: . .ooHMMMMMMM, .. - 9Mb `- 1MMMMMMMMMM| : {Name} - ?M |MM#*#MMMM* . {Description} - -. ` |#"' ,' {Loaded} - . -" v` - -. .- - - . . ` - '-*#d#HHMMMMHH#"-' - -$$$$$AnyShIt$$$$$$ - - .-:?,Z?:&$dHH##b\_ - ,:bqRMMMMMMMMMMMMMMMMMHo. - .?HHHMMMMMMMMMMMMMMMMMMMMMMMHo. - -o/*M9MMMMMMMMMMMMMMMMMMMMMMMMMMMv - .:H\b\'|?#HHMMMMMMMMMMMMMMMMMMMMMM6?Z\ - .?MMMHbdbbodMMMMHMMMMMMMMMMMMMMMMMMMM\': - :MMMMMMMMMMM7MMMMb?6P**#MMMMMMMMMMMMMMM_ : - \MMMMMMMMMMMMb^MMMMMM? `*MMMM*"`MMMR<' . - -.1MMMMMMMMMMMMMb]M#"" 9MR' `?MMb \. : --MMMMMMMMMMMMMMMH##|` *&. |`*' .\ . --?""*MMMMMMMMMMMMM' ' |?b ,]" : -: MMMMMMMMMMH' `M_|M]r\? -. `MMMMMMMMM' `$_:`'"H -- TMMMMMMMM, '"``:: -: [MMMMMMMM| oH| .#M- - : `9MMMMMM' .MP . ,oMMT - . HMMMMP' `' ,MMMP {Name} - - `MMH' HH9* {Description} - '. ` ` .' {Loaded} - - . ' - ` . - .- - ` . .- - ' -==pHMMH##HH#""" diff --git a/vendor/owner888/phpspider/hacked-emails/hacked_emails.php b/vendor/owner888/phpspider/hacked-emails/hacked_emails.php deleted file mode 100644 index 11771d7..0000000 --- a/vendor/owner888/phpspider/hacked-emails/hacked_emails.php +++ /dev/null @@ -1,49 +0,0 @@ - - * @copyright seatle - * @link http://www.epooll.com/ - * @license http://www.opensource.org/licenses/mit-license.php MIT License - */ - -class cls_curl -{ - protected static $timeout = 10; - protected static $ch = null; - protected static $useragent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.89 Safari/537.36'; - protected static $http_raw = false; - protected static $cookie = null; - protected static $cookie_jar = null; - protected static $cookie_file = null; - protected static $referer = null; - protected static $ip = null; - protected static $proxy = null; - protected static $headers = array(); - protected static $hosts = array(); - protected static $gzip = false; - protected static $info = array(); - - /** - * set timeout - * - * @param init $timeout - * @return - */ - public static function set_timeout($timeout) - { - self::$timeout = $timeout; - } - - /** - * 设置代理 - * - * @param mixed $proxy - * @return void - * @author seatle - * @created time :2016-09-18 10:17 - */ - public static function set_proxy($proxy) - { - self::$proxy = $proxy; - } - - /** - * set referer - * - */ - public static function set_referer($referer) - { - self::$referer = $referer; - } - - /** - * 设置 user_agent - * - * @param string $useragent - * @return void - */ - public static function set_useragent($useragent) - { - self::$useragent = $useragent; - } - - /** - * 设置COOKIE - * - * @param string $cookie - * @return void - */ - public static function set_cookie($cookie) - { - self::$cookie = $cookie; - } - - /** - * 设置COOKIE JAR - * - * @param string $cookie_jar - * @return void - */ - public static function set_cookie_jar($cookie_jar) - { - self::$cookie_jar = $cookie_jar; - } - - /** - * 设置COOKIE FILE - * - * @param string $cookie_file - * @return void - */ - public static function set_cookie_file($cookie_file) - { - self::$cookie_file = $cookie_file; - } - - /** - * 获取内容的时候是不是连header也一起获取 - * - * @param mixed $http_raw - * @return void - * @author seatle - * @created time :2016-09-18 10:17 - */ - public static function set_http_raw($http_raw) - { - self::$http_raw = $http_raw; - } - - /** - * 设置IP - * - * @param string $ip - * @return void - */ - public static function set_ip($ip) - { - self::$ip = $ip; - } - - /** - * 设置Headers - * - * @param string $headers - * @return void - */ - public static function set_headers($headers) - { - self::$headers = $headers; - } - - /** - * 设置Hosts - * - * @param string $hosts - * @return void - */ - public static function set_hosts($hosts) - { - self::$hosts = $hosts; - } - - /** - * 设置Gzip - * - * @param string $hosts - * @return void - */ - public static function set_gzip($gzip) - { - self::$gzip = $gzip; - } - - /** - * 初始化 CURL - * - */ - public static function init() - { - //if (empty ( self::$ch )) - if (!is_resource ( self::$ch )) - { - self::$ch = curl_init (); - curl_setopt( self::$ch, CURLOPT_RETURNTRANSFER, true ); - curl_setopt( self::$ch, CURLOPT_CONNECTTIMEOUT, self::$timeout ); - curl_setopt( self::$ch, CURLOPT_HEADER, false ); - curl_setopt( self::$ch, CURLOPT_USERAGENT, self::$useragent ); - curl_setopt( self::$ch, CURLOPT_TIMEOUT, self::$timeout + 5); - // 在多线程处理场景下使用超时选项时,会忽略signals对应的处理函数,但是无耐的是还有小概率的crash情况发生 - curl_setopt( self::$ch, CURLOPT_NOSIGNAL, true); - } - return self::$ch; - } - - /** - * get - * - * - */ - public static function get($url, $fields = array()) - { - self::init (); - return self::http_request($url, 'get', $fields); - } - - /** - * $fields 有三种类型:1、数组;2、http query;3、json - * 1、array('name'=>'yangzetao') 2、http_build_query(array('name'=>'yangzetao')) 3、json_encode(array('name'=>'yangzetao')) - * 前两种是普通的post,可以用$_POST方式获取 - * 第三种是post stream( json rpc,其实就是webservice ),虽然是post方式,但是只能用流方式 http://input 后者 $HTTP_RAW_POST_DATA 获取 - * - * @param mixed $url - * @param array $fields - * @param mixed $proxy - * @static - * @access public - * @return void - */ - public static function post($url, $fields = array()) - { - self::init (); - return self::http_request($url, 'post', $fields); - } - - public static function http_request($url, $type = 'get', $fields) - { - // 如果是 get 方式,直接拼凑一个 url 出来 - if (strtolower($type) == 'get' && !empty($fields)) - { - $url = $url . (strpos($url,"?")===false ? "?" : "&") . http_build_query($fields); - } - - // 随机绑定 hosts,做负载均衡 - if (self::$hosts) - { - $parse_url = parse_url($url); - $host = $parse_url['host']; - $key = rand(0, count(self::$hosts)-1); - $ip = self::$hosts[$key]; - $url = str_replace($host, $ip, $url); - self::$headers = array_merge( array('Host:'.$host), self::$headers ); - } - curl_setopt( self::$ch, CURLOPT_URL, $url ); - // 如果是 post 方式 - if (strtolower($type) == 'post') - { - curl_setopt( self::$ch, CURLOPT_POST, true ); - curl_setopt( self::$ch, CURLOPT_POSTFIELDS, $fields ); - } - if (self::$useragent) - { - curl_setopt( self::$ch, CURLOPT_USERAGENT, self::$useragent ); - } - if (self::$cookie) - { - curl_setopt( self::$ch, CURLOPT_COOKIE, self::$cookie ); - } - if (self::$cookie_jar) - { - curl_setopt( self::$ch, CURLOPT_COOKIEJAR, self::$cookie_jar ); - } - if (self::$cookie_file) - { - curl_setopt( self::$ch, CURLOPT_COOKIEFILE, self::$cookie_file ); - } - if (self::$referer) - { - curl_setopt( self::$ch, CURLOPT_REFERER, self::$referer ); - } - if (self::$ip) - { - self::$headers = array_merge( array('CLIENT-IP:'.self::$ip, 'X-FORWARDED-FOR:'.self::$ip), self::$headers ); - } - if (self::$headers) - { - curl_setopt( self::$ch, CURLOPT_HTTPHEADER, self::$headers ); - } - if (self::$gzip) - { - curl_setopt( self::$ch, CURLOPT_ENCODING, 'gzip' ); - } - if (self::$proxy) - { - curl_setopt( self::$ch, CURLOPT_PROXY, self::$proxy ); - } - if (self::$http_raw) - { - curl_setopt( self::$ch, CURLOPT_HEADER, true ); - } - - $data = curl_exec ( self::$ch ); - self::$info = curl_getinfo(self::$ch); - if ($data === false) - { - //echo date("Y-m-d H:i:s"), ' Curl error: ' . curl_error( self::$ch ), "\n"; - } - - // 关闭句柄 - curl_close( self::$ch ); - //$data = substr($data, 10); - //$data = gzinflate($data); - return $data; - } - - public static function get_info() - { - return self::$info; - } - - public static function get_http_code() - { - return self::$info['http_code']; - } -} - -function classic_curl($urls, $delay) -{ - $queue = curl_multi_init(); - $map = array(); - - foreach ($urls as $url) - { - // create cURL resources - $ch = curl_init(); - - // 设置 URL 和 其他参数 - curl_setopt($ch, CURLOPT_URL, $url); - curl_setopt($ch, CURLOPT_TIMEOUT, 1); - curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); - curl_setopt($ch, CURLOPT_HEADER, 0); - curl_setopt($ch, CURLOPT_NOSIGNAL, true); - - // 把当前 curl resources 加入到 curl_multi_init 队列 - curl_multi_add_handle($queue, $ch); - $map[$url] = $ch; - } - - $active = null; - - // execute the handles - do { - $mrc = curl_multi_exec($queue, $active); - } while ($mrc == CURLM_CALL_MULTI_PERFORM); - - while ($active > 0 && $mrc == CURLM_OK) { - while (curl_multi_exec($queue, $active) === CURLM_CALL_MULTI_PERFORM); - // 这里 curl_multi_select 一直返回 -1,所以这里就死循环了,CPU就100%了 - if (curl_multi_select($queue, 0.5) != -1) - { - do { - $mrc = curl_multi_exec($queue, $active); - } while ($mrc == CURLM_CALL_MULTI_PERFORM); - } - } - - $responses = array(); - foreach ($map as $url=>$ch) { - //$responses[$url] = callback(curl_multi_getcontent($ch), $delay); - $responses[$url] = callback(curl_multi_getcontent($ch), $delay, $url); - curl_multi_remove_handle($queue, $ch); - curl_close($ch); - } - - curl_multi_close($queue); - return $responses; -} - -function rolling_curl($urls, $delay) -{ - $queue = curl_multi_init(); - $map = array(); - - foreach ($urls as $url) { - $ch = curl_init(); - - curl_setopt($ch, CURLOPT_URL, $url); - curl_setopt($ch, CURLOPT_TIMEOUT, 10); - curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); - curl_setopt($ch, CURLOPT_HEADER, 0); - curl_setopt($ch, CURLOPT_NOSIGNAL, true); - $cookie = '_za=36643642-e546-4d60-a771-8af8dcfbd001; q_c1=a57a2b9f10964f909b8d8969febf3ab2|1437705596000|1437705596000; _xsrf=f0304fba4e44e1d008ec308d59bab029; cap_id="YWY1YmRmODlmZGVmNDc3MWJlZGFkZDg3M2E0M2Q5YjM=|1437705596|963518c454bb6f10d96775021c098c84e1e46f5a"; z_c0="QUFCQVgtRWZBQUFYQUFBQVlRSlZUVjR6NEZVUTgtRkdjTVc5UDMwZXRJZFdWZ2JaOWctNVhnPT0=|1438164574|aed6ef3707f246a7b64da4f1e8c089395d77ff2b"; __utma=51854390.1105113342.1437990174.1438160686.1438164116.10; __utmc=51854390; __utmz=51854390.1438134939.8.5.utmcsr=zhihu.com|utmccn=(referral)|utmcmd=referral|utmcct=/people/yangzetao; __utmv=51854390.100-1|2=registration_date=20131030=1^3=entry_date=20131030=1'; - curl_setopt($ch, CURLOPT_COOKIE, $cookie); - $useragent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.89 Safari/537.36'; - curl_setopt( $ch, CURLOPT_USERAGENT, $useragent ); - curl_setopt($ch, CURLOPT_ENCODING, 'gzip'); - - curl_multi_add_handle($queue, $ch); - $map[(string) $ch] = $url; - } - - $responses = array(); - do { - while (($code = curl_multi_exec($queue, $active)) == CURLM_CALL_MULTI_PERFORM) ; - - if ($code != CURLM_OK) { break; } - - // a request was just completed -- find out which one - while ($done = curl_multi_info_read($queue)) { - - // get the info and content returned on the request - $info = curl_getinfo($done['handle']); - $error = curl_error($done['handle']); - $results = callback(curl_multi_getcontent($done['handle']), $delay, $map[(string) $done['handle']]); - $responses[$map[(string) $done['handle']]] = compact('info', 'error', 'results'); - - // remove the curl handle that just completed - curl_multi_remove_handle($queue, $done['handle']); - curl_close($done['handle']); - } - - // Block for data in / output; error handling is done by curl_multi_exec - if ($active > 0) { - curl_multi_select($queue, 0.5); - } - - } while ($active); - - curl_multi_close($queue); - return $responses; -} - -function callback($data, $delay, $url) { - //echo $data; - //echo date("Y-m-d H:i:s", time()) . " --- " . $url . "\n"; - if (!empty($data)) - { - file_put_contents("./html2/".md5($url).".html", $data); - } - // usleep模拟现实中比较负责的数据处理逻辑(如提取, 分词, 写入文件或数据库等) - //usleep(1); - //return compact('data', 'matches'); -} - diff --git a/vendor/owner888/phpspider/library/cls_query.php b/vendor/owner888/phpspider/library/cls_query.php deleted file mode 100644 index 65dd9fe..0000000 --- a/vendor/owner888/phpspider/library/cls_query.php +++ /dev/null @@ -1,248 +0,0 @@ - - * @created time :2015-08-08 15:52 - */ - private static function get_nodes($query) - { - // 把一到多个空格 替换成 一个空格 - // 把 > 和 ~ 符号两边的空格去掉,因为没有用这两个符号,所以这里可以不这么做 - // ul>li.className - $query = trim( - preg_replace('@\s+@', ' ', - preg_replace('@\s*(>|\\+|~)\s*@', '\\1', $query) - ) - ); - - $nodes = array(); - if (! $query) - { - return $nodes; - } - - $query_arr = explode(" ", $query); - foreach ($query_arr as $k=>$v) - { - $path = $k == 0 ? $v : $path.' '.$v; - $node = array("path"=>(string)$path, "name"=>"", "id"=>"", "class"=>"", "other"=>array()); - // 如果存在内容选择器 - if (preg_match('@(.*?)\[(.*?)=[\'|"](.*?)[\'|"]\]@', $v, $matches) && !empty($matches[2]) && !empty($matches[3])) - { - // 把选择器过滤掉 [rel='topic'] - $v = $matches[1]; - $node['other'] = array( - 'key'=>$matches[2], - 'val'=>$matches[3], - ); - } - - // 如果存在 id - $id_arr = explode("#", $v); - $class_arr = explode(".", $v); - if (count($id_arr) === 2) - { - $node['name'] = $id_arr[0]; - $node['id'] = $id_arr[1]; - } - // 如果存在 class - elseif (count($class_arr) === 2) - { - $node['name'] = $class_arr[0]; - $node['class'] = $class_arr[1]; - } - // 如果没有样式 - else - { - $node['name'] = $v; - } - $nodes[] = $node; - } - //print_r($nodes); - //exit; - return $nodes; - } - - public static function get_datas($nodes, $attr = "html") - { - if (empty(self::$content)) - { - return false; - } - - $node_datas = array(); - $count = count($nodes); - // 循环所有节点 - foreach ($nodes as $i=>$node) - { - $is_last = $count == $i+1 ? true : false; - // 第一次 - if ($i == 0) - { - $datas = array(); - $datas = self::get_node_datas($node, self::$content, $attr, $is_last); - // 如果第一次都取不到数据,直接跳出循环 - if(!$datas) - { - break; - } - $node_datas[$nodes[$i]['path']] = $datas; - } - else - { - $datas = array(); - // 循环上一个节点的数组 - foreach ($node_datas[$nodes[$i-1]['path']] as $v) - { - $datas = array_merge( $datas, self::get_node_datas($node, trim($v), $attr, $is_last) ); - } - $node_datas[$nodes[$i]['path']] = $datas; - // 删除上一个节点,防止内存溢出,或者缓存到本地,再次使用?! - unset($node_datas[$nodes[$i-1]['path']]); - } - } - //print_r($datas);exit; - // 从数组中弹出最后一个元素 - $node_datas = array_pop($node_datas); - //print_r($node_datas); - //exit; - return $node_datas; - } - - /** - * 从节点中获取内容 - * $regex = '@]+http-equiv\\s*=\\s*(["|\'])Content-Type\\1([^>]+?)>@i'; - * - * @param mixed $node - * @param mixed $content - * @return void - * @author seatle - * @created time :2015-08-08 15:52 - */ - private static function get_node_datas($node, $content, $attr = "html", $is_last = false) - { - $node_datas = $datas = array(); - - if (!empty($node['id'])) - { - if ($node['name']) - $regex = '@<'.$node['name'].'[^>]+id\\s*=\\s*["|\']+?'.$node['id'].'\\s*[^>]+?>(.*?)@is'; - else - $regex = '@id\\s*=\\s*["|\']+?'.$node['id'].'\\s*[^>]+?>(.*?)<@is'; - } - elseif (!empty($node['class'])) - { - if ($node['name']) - $regex = '@<'.$node['name'].'[^>]+class\\s*=\\s*["|\']+?'.$node['class'].'\\s*[^>]+?>(.*?)@is'; - else - $regex = '@class\\s*=\\s*["|\']+?'.$node['class'].'\\s*[^>]+?>(.*?)<@is'; - } - else - { - // 这里为是么是*,0次到多次,因为有可能是
  • - $regex = '@<'.$node['name'].'[^>]*?>(.*?)@is'; - } - self::log("regex --- " . $regex);; - preg_match_all($regex, $content, $matches); - $all_datas = empty($matches[0]) ? array() : $matches[0]; - $html_datas = empty($matches[1]) ? array() : $matches[1]; - - // 过滤掉选择器对不上的 - foreach ($all_datas as $i=>$data) - { - // 如果有设置其他选择器,验证一下选择器 - if (!empty($node['other'])) - { - $regex = '@'.$node['other']['key'].'=[\'|"]'.$node['other']['val'].'[\'|"]@is'; - self::log("regex other --- " . $regex); - // 过滤器对不上的,跳过 - if (!preg_match($regex, $data, $matches)) - { - continue; - } - } - // 获取节点的html内容 - if ($attr != "html" && $is_last) - { - $regex = '@'.$attr.'=[\'|"](.*?)[\'|"]@is'; - preg_match($regex, $data, $matches); - $node_datas[] = empty($matches[1]) ? '' : trim($matches[1]); - } - // 获取节点属性名的值 - else - { - $node_datas[] = trim($html_datas[$i]); - } - } - //echo " 11111 ========================================= \n"; - //print_r($node_datas); - //echo " 22222 ========================================= \n\n\n"; - return $node_datas; - } - - /** - * 记录日志 - * @param string $msg - * @return void - */ - private static function log($msg) - { - $msg = "[".date("Y-m-d H:i:s")."] " . $msg . "\n"; - if (self::$debug) - { - echo $msg; - } - } - -} - -//$xpath = "ul.top-nav-dropdown li"; -//$xpath = "i.zg-icon"; -//print_r($nodes); -//exit; -// [^>]+ 不是>的字符重复一次到多次, ? 表示不贪婪 -// \s 表示空白字符 -// * 表示0次或者多次 -// + 表示1次或者多次 -// -// 后向引用,表示表达式中,从左往右数,第一个左括号对应的括号内的内容。 -// \\0 表示整个表达式 -// \\1表示第1个表达式 -// \\2表示第2个表达式 -// $regex = '@]+http-equiv\\s*=\\s*(["|\'])Content-Type\\1([^>]+?)>@i'; -//preg_match_all($regex, $content, $matches); -//print_r($matches); -//exit; - -// 用法 -//$content = file_get_contents("./test.html"); -//$query = "ul#top-nav-profile-dropdown li a"; -//$query = "div#zh-profile-following-topic a.link[href='/topic/19550937']"; -//cls_query::init($content); -//$list = cls_query::query($query, "href"); -//print_r($list); - diff --git a/vendor/owner888/phpspider/library/cls_redis.php b/vendor/owner888/phpspider/library/cls_redis.php deleted file mode 100644 index 88dc0b8..0000000 --- a/vendor/owner888/phpspider/library/cls_redis.php +++ /dev/null @@ -1,1263 +0,0 @@ - -// +---------------------------------------------------------------------- - -//---------------------------------- -// PHPSpider Redis操作类文件 -//---------------------------------- - -class cls_redis -{ - /** - * redis链接标识符号 - */ - protected static $redis = NULL; - - /** - * redis配置数组 - */ - protected static $configs = array(); - private static $links = array(); - private static $link_name = 'default'; - - /** - * 默认redis前缀 - */ - public static $prefix = "phpspider"; - - public static $error = ""; - - public static function init() - { - if (!extension_loaded("redis")) - { - self::$error = "The redis extension was not found"; - return false; - } - - // 获取配置 - $config = self::$link_name == 'default' ? self::_get_default_config() : self::$configs[self::$link_name]; - - // 如果当前链接标识符为空,或者ping不同,就close之后重新打开 - //if ( empty(self::$links[self::$link_name]) || !self::ping() ) - if (empty(self::$links[self::$link_name])) - { - self::$links[self::$link_name] = new Redis(); - if (!self::$links[self::$link_name]->connect($config['host'], $config['port'], $config['timeout'])) - { - self::$error = "Unable to connect to redis server\nPlease check the configuration file config/inc_config.php"; - unset(self::$links[self::$link_name]); - return false; - } - - // 验证 - if ($config['pass']) - { - if ( !self::$links[self::$link_name]->auth($config['pass']) ) - { - self::$error = "Redis Server authentication failed\nPlease check the configuration file config/inc_config.php"; - unset(self::$links[self::$link_name]); - return false; - } - } - - $prefix = empty($config['prefix']) ? self::$prefix : $config['prefix']; - self::$links[self::$link_name]->setOption(Redis::OPT_PREFIX, $prefix . ":"); - self::$links[self::$link_name]->setOption(Redis::OPT_READ_TIMEOUT, -1); - self::$links[self::$link_name]->select($config['db']); - } - - return self::$links[self::$link_name]; - } - - public static function clear_link() - { - if(self::$links) - { - foreach(self::$links as $k=>$v) - { - $v->close(); - unset(self::$links[$k]); - } - } - } - - public static function set_connect($link_name, $config = array()) - { - self::$link_name = $link_name; - if (!empty($config)) - { - self::$configs[self::$link_name] = $config; - } - else - { - if (empty(self::$configs[self::$link_name])) - { - throw new Exception("You not set a config array for connect!"); - } - } - //print_r(self::$configs); - - //// 先断开原来的连接 - //if ( !empty(self::$links[self::$link_name]) ) - //{ - //self::$links[self::$link_name]->close(); - //self::$links[self::$link_name] = null; - //} - } - - public static function set_connect_default() - { - $config = self::_get_default_config(); - self::set_connect('default', $config); - } - - /** - * 获取默认配置 - */ - protected static function _get_default_config() - { - if (empty(self::$configs['default'])) - { - if (!is_array($GLOBALS['config']['redis'])) - { - exit('cls_redis.php _get_default_config()' . '没有redis配置'); - // You not set a config array for connect\nPlease check the configuration file config/inc_config.php - } - self::$configs['default'] = $GLOBALS['config']['redis']; - } - return self::$configs['default']; - } - - /** - * set - * - * @param mixed $key 键 - * @param mixed $value 值 - * @param int $expire 过期时间,单位:秒 - * @return void - * @author seatle - * @created time :2015-12-13 01:05 - */ - public static function set($key, $value, $expire = 0) - { - self::init(); - try - { - if ( self::$links[self::$link_name] ) - { - if ($expire > 0) - { - return self::$links[self::$link_name]->setex($key, $expire, $value); - } - else - { - return self::$links[self::$link_name]->set($key, $value); - } - } - } - catch (Exception $e) - { - $msg = "PHP Fatal error: Uncaught exception 'RedisException' with message '".$e->getMessage()."'\n"; - log::warn($msg); - if ($e->getCode() == 0) - { - self::$links[self::$link_name]->close(); - self::$links[self::$link_name] = null; - usleep(100000); - return self::set($key, $value, $expire); - } - } - return NULL; - } - - - /** - * set - * - * @param mixed $key 键 - * @param mixed $value 值 - * @param int $expire 过期时间,单位:秒 - * @return void - * @author seatle - * @created time :2015-12-13 01:05 - */ - public static function setnx($key, $value, $expire = 0) - { - self::init(); - try - { - if ( self::$links[self::$link_name] ) - { - if ($expire > 0) - { - return self::$links[self::$link_name]->set($key, $value, array('nx', 'ex' => $expire)); - //self::$links[self::$link_name]->multi(); - //self::$links[self::$link_name]->setNX($key, $value); - //self::$links[self::$link_name]->expire($key, $expire); - //self::$links[self::$link_name]->exec(); - //return true; - } - else - { - return self::$links[self::$link_name]->setnx($key, $value); - } - } - } - catch (Exception $e) - { - $msg = "PHP Fatal error: Uncaught exception 'RedisException' with message '".$e->getMessage()."'\n"; - log::warn($msg); - if ($e->getCode() == 0) - { - self::$links[self::$link_name]->close(); - self::$links[self::$link_name] = null; - usleep(100000); - return self::setnx($key, $value, $expire); - } - } - return NULL; - } - - /** - * 锁 - * 默认锁1秒 - * - * @param mixed $name 锁的标识名 - * @param mixed $value 锁的值,貌似没啥意义 - * @param int $expire 当前锁的最大生存时间(秒),必须大于0,超过生存时间系统会自动强制释放锁 - * @param int $interval 获取锁失败后挂起再试的时间间隔(微秒) - * @return void - * @author seatle - * @created time :2016-10-30 23:56 - */ - public static function lock($name, $value = 1, $expire = 5, $interval = 100000) - { - if ($name == null) return false; - - self::init(); - try - { - if ( self::$links[self::$link_name] ) - { - $key = "Lock:{$name}"; - while (true) - { - // 因为 setnx 没有 expire 设置,所以还是用set - //$result = self::$links[self::$link_name]->setnx($key, $value); - $result = self::$links[self::$link_name]->set($key, $value, array('nx', 'ex' => $expire)); - if ($result != false) - { - return true; - } - - usleep($interval); - } - return false; - } - } - catch (Exception $e) - { - $msg = "PHP Fatal error: Uncaught exception 'RedisException' with message '".$e->getMessage()."'\n"; - log::warn($msg); - if ($e->getCode() == 0) - { - self::$links[self::$link_name]->close(); - self::$links[self::$link_name] = null; - // 睡眠100毫秒 - usleep(100000); - return self::lock($name, $value, $expire, $interval); - } - } - return false; - } - - public static function unlock($name) - { - $key = "Lock:{$name}"; - return self::del($key); - } - - /** - * get - * - * @param mixed $key - * @return void - * @author seatle - * @created time :2015-12-13 01:05 - */ - public static function get($key) - { - self::init(); - try - { - if ( self::$links[self::$link_name] ) - { - return self::$links[self::$link_name]->get($key); - } - } - catch (Exception $e) - { - $msg = "PHP Fatal error: Uncaught exception 'RedisException' with message '".$e->getMessage()."'\n"; - log::warn($msg); - if ($e->getCode() == 0) - { - self::$links[self::$link_name]->close(); - self::$links[self::$link_name] = null; - usleep(100000); - return self::get($key); - } - } - return NULL; - } - - /** - * del 删除数据 - * - * @param mixed $key - * @return void - * @author seatle - * @created time :2015-12-13 01:05 - */ - public static function del($key) - { - self::init(); - try - { - if ( self::$links[self::$link_name] ) - { - return self::$links[self::$link_name]->del($key); - } - } - catch (Exception $e) - { - $msg = "PHP Fatal error: Uncaught exception 'RedisException' with message '".$e->getMessage()."'\n"; - log::warn($msg); - if ($e->getCode() == 0) - { - self::$links[self::$link_name]->close(); - self::$links[self::$link_name] = null; - usleep(100000); - return self::del($key); - } - } - return NULL; - } - - /** - * type 返回值的类型 - * - * @param mixed $key - * @return void - * @author seatle - * @created time :2015-12-13 01:05 - */ - public static function type($key) - { - self::init(); - - $types = array( - '0' => 'set', - '1' => 'string', - '3' => 'list', - ); - - try - { - if ( self::$links[self::$link_name] ) - { - $type = self::$links[self::$link_name]->type($key); - if (isset($types[$type])) - { - return $types[$type]; - } - } - } - catch (Exception $e) - { - $msg = "PHP Fatal error: Uncaught exception 'RedisException' with message '".$e->getMessage()."'\n"; - log::warn($msg); - if ($e->getCode() == 0) - { - self::$links[self::$link_name]->close(); - self::$links[self::$link_name] = null; - usleep(100000); - return self::type($key); - } - } - return NULL; - } - - /** - * incr 名称为key的string增加integer, integer为0则增1 - * - * @param mixed $key - * @param int $integer - * @return void - * @author seatle - * @created time :2015-12-18 11:28 - */ - public static function incr($key, $integer = 0) - { - self::init(); - try - { - if ( self::$links[self::$link_name] ) - { - if (empty($integer)) - { - return self::$links[self::$link_name]->incr($key); - } - else - { - return self::$links[self::$link_name]->incrby($key, $integer); - } - } - } - catch (Exception $e) - { - $msg = "PHP Fatal error: Uncaught exception 'RedisException' with message '".$e->getMessage()."'\n"; - log::warn($msg); - if ($e->getCode() == 0) - { - self::$links[self::$link_name]->close(); - self::$links[self::$link_name] = null; - usleep(100000); - return self::incr($key, $integer); - } - } - return NULL; - } - - /** - * decr 名称为key的string减少integer, integer为0则减1 - * - * @param mixed $key - * @param int $integer - * @return void - * @author seatle - * @created time :2015-12-18 11:28 - */ - public static function decr($key, $integer = 0) - { - self::init(); - try - { - if ( self::$links[self::$link_name] ) - { - if (empty($integer)) - { - return self::$links[self::$link_name]->decr($key); - } - else - { - return self::$links[self::$link_name]->decrby($key, $integer); - } - } - } - catch (Exception $e) - { - $msg = "PHP Fatal error: Uncaught exception 'RedisException' with message '".$e->getMessage()."'\n"; - log::warn($msg); - if ($e->getCode() == 0) - { - self::$links[self::$link_name]->close(); - self::$links[self::$link_name] = null; - usleep(100000); - return self::decr($key, $integer); - } - } - return NULL; - } - - /** - * append 名称为key的string的值附加value - * - * @param mixed $key - * @param mixed $value - * @return void - * @author seatle - * @created time :2015-12-18 11:28 - */ - public static function append($key, $value) - { - self::init(); - try - { - if ( self::$links[self::$link_name] ) - { - return self::$links[self::$link_name]->append($key, $value); - } - } - catch (Exception $e) - { - $msg = "PHP Fatal error: Uncaught exception 'RedisException' with message '".$e->getMessage()."'\n"; - log::warn($msg); - if ($e->getCode() == 0) - { - self::$links[self::$link_name]->close(); - self::$links[self::$link_name] = null; - usleep(100000); - return self::append($key, $value); - } - } - return NULL; - } - - /** - * substr 返回名称为key的string的value的子串 - * - * @param mixed $key - * @param mixed $start - * @param mixed $end - * @return void - * @author seatle - * @created time :2015-12-18 11:28 - */ - public static function substr($key, $start, $end) - { - self::init(); - try - { - if ( self::$links[self::$link_name] ) - { - return self::$links[self::$link_name]->substr($key, $start, $end); - } - } - catch (Exception $e) - { - $msg = "PHP Fatal error: Uncaught exception 'RedisException' with message '".$e->getMessage()."'\n"; - log::warn($msg); - if ($e->getCode() == 0) - { - self::$links[self::$link_name]->close(); - self::$links[self::$link_name] = null; - usleep(100000); - return self::substr($key, $start, $end); - } - } - return NULL; - } - - /** - * select 按索引查询 - * - * @param mixed $index - * @return void - * @author seatle - * @created time :2015-12-18 11:28 - */ - public static function select($index) - { - self::init(); - try - { - if ( self::$links[self::$link_name] ) - { - return self::$links[self::$link_name]->select($index); - } - } - catch (Exception $e) - { - $msg = "PHP Fatal error: Uncaught exception 'RedisException' with message '".$e->getMessage()."'\n"; - log::warn($msg); - if ($e->getCode() == 0) - { - self::$links[self::$link_name]->close(); - self::$links[self::$link_name] = null; - usleep(100000); - return self::select($index); - } - } - return NULL; - } - - /** - * dbsize 返回当前数据库中key的数目 - * - * @param mixed $key - * @return void - * @author seatle - * @created time :2015-12-18 11:28 - */ - public static function dbsize() - { - self::init(); - try - { - if ( self::$links[self::$link_name] ) - { - return self::$links[self::$link_name]->dbsize(); - } - } - catch (Exception $e) - { - $msg = "PHP Fatal error: Uncaught exception 'RedisException' with message '".$e->getMessage()."'\n"; - log::warn($msg); - if ($e->getCode() == 0) - { - self::$links[self::$link_name]->close(); - self::$links[self::$link_name] = null; - usleep(100000); - return self::dbsize(); - } - } - return NULL; - } - - /** - * flushdb 删除当前选择数据库中的所有key - * - * @return void - * @author seatle - * @created time :2015-12-18 11:28 - */ - public static function flushdb() - { - self::init(); - try - { - if ( self::$links[self::$link_name] ) - { - return self::$links[self::$link_name]->flushdb(); - } - } - catch (Exception $e) - { - $msg = "PHP Fatal error: Uncaught exception 'RedisException' with message '".$e->getMessage()."'\n"; - log::warn($msg); - if ($e->getCode() == 0) - { - self::$links[self::$link_name]->close(); - self::$links[self::$link_name] = null; - usleep(100000); - return self::flushdb(); - } - } - return NULL; - } - - /** - * flushall 删除所有数据库中的所有key - * - * @return void - * @author seatle - * @created time :2015-12-18 11:28 - */ - public static function flushall() - { - self::init(); - try - { - if ( self::$links[self::$link_name] ) - { - return self::$links[self::$link_name]->flushall(); - } - } - catch (Exception $e) - { - $msg = "PHP Fatal error: Uncaught exception 'RedisException' with message '".$e->getMessage()."'\n"; - log::warn($msg); - if ($e->getCode() == 0) - { - self::$links[self::$link_name]->close(); - self::$links[self::$link_name] = null; - usleep(100000); - return self::flushall(); - } - } - return NULL; - } - - /** - * save 将数据保存到磁盘 - * - * @param mixed $is_bgsave 将数据异步保存到磁盘 - * @return void - * @author seatle - * @created time :2015-12-18 11:28 - */ - public static function save($is_bgsave = false) - { - self::init(); - try - { - if ( self::$links[self::$link_name] ) - { - if (!$is_bgsave) - { - return self::$links[self::$link_name]->save(); - } - else - { - return self::$links[self::$link_name]->bgsave(); - } - } - } - catch (Exception $e) - { - $msg = "PHP Fatal error: Uncaught exception 'RedisException' with message '".$e->getMessage()."'\n"; - log::warn($msg); - if ($e->getCode() == 0) - { - self::$links[self::$link_name]->close(); - self::$links[self::$link_name] = null; - usleep(100000); - return self::save($is_bgsave); - } - } - return NULL; - } - - /** - * info 提供服务器的信息和统计 - * - * @return void - * @author seatle - * @created time :2015-12-18 11:28 - */ - public static function info() - { - self::init(); - try - { - if ( self::$links[self::$link_name] ) - { - return self::$links[self::$link_name]->info(); - } - } - catch (Exception $e) - { - $msg = "PHP Fatal error: Uncaught exception 'RedisException' with message '".$e->getMessage()."'\n"; - log::warn($msg); - if ($e->getCode() == 0) - { - self::$links[self::$link_name]->close(); - self::$links[self::$link_name] = null; - usleep(100000); - return self::info(); - } - } - return NULL; - } - - /** - * slowlog 慢查询日志 - * - * @return void - * @author seatle - * @created time :2015-12-18 11:28 - */ - public static function slowlog($command = 'get', $len = 0) - { - self::init(); - try - { - if ( self::$links[self::$link_name] ) - { - if (!empty($len)) - { - return $redis->slowlog($command, $len); - } - else - { - return $redis->slowlog($command); - } - } - } - catch (Exception $e) - { - $msg = "PHP Fatal error: Uncaught exception 'RedisException' with message '".$e->getMessage()."'\n"; - log::warn($msg); - if ($e->getCode() == 0) - { - self::$links[self::$link_name]->close(); - self::$links[self::$link_name] = null; - usleep(100000); - return self::slowlog($command, $len); - } - } - return NULL; - } - - /** - * lastsave 返回上次成功将数据保存到磁盘的Unix时戳 - * - * @return void - * @author seatle - * @created time :2015-12-18 11:28 - */ - public static function lastsave() - { - self::init(); - try - { - if ( self::$links[self::$link_name] ) - { - return self::$links[self::$link_name]->lastsave(); - } - } - catch (Exception $e) - { - $msg = "PHP Fatal error: Uncaught exception 'RedisException' with message '".$e->getMessage()."'\n"; - log::warn($msg); - if ($e->getCode() == 0) - { - self::$links[self::$link_name]->close(); - self::$links[self::$link_name] = null; - usleep(100000); - return self::lastsave(); - } - } - return NULL; - } - - /** - * lpush 将数据从左边压入 - * - * @param mixed $key - * @param mixed $value - * @return void - * @author seatle - * @created time :2015-12-13 01:05 - */ - public static function lpush($key, $value) - { - self::init(); - try - { - if ( self::$links[self::$link_name] ) - { - return self::$links[self::$link_name]->lpush($key, $value); - } - } - catch (Exception $e) - { - $msg = "PHP Fatal error: Uncaught exception 'RedisException' with message '".$e->getMessage()."'\n"; - log::warn($msg); - if ($e->getCode() == 0) - { - self::$links[self::$link_name]->close(); - self::$links[self::$link_name] = null; - usleep(100000); - return self::lpush($key, $value); - } - } - return NULL; - } - - /** - * rpush 将数据从右边压入 - * - * @param mixed $key - * @param mixed $value - * @return void - * @author seatle - * @created time :2015-12-13 01:05 - */ - public static function rpush($key, $value) - { - self::init(); - try - { - if ( self::$links[self::$link_name] ) - { - return self::$links[self::$link_name]->rpush($key, $value); - } - } - catch (Exception $e) - { - $msg = "PHP Fatal error: Uncaught exception 'RedisException' with message '".$e->getMessage()."'\n"; - log::warn($msg); - if ($e->getCode() == 0) - { - self::$links[self::$link_name]->close(); - self::$links[self::$link_name] = null; - usleep(100000); - return self::rpush($key, $value); - } - } - return NULL; - } - - /** - * lpop 从左边弹出数据, 并删除数据 - * - * @param mixed $key - * @return void - * @author seatle - * @created time :2015-12-13 01:05 - */ - public static function lpop($key) - { - self::init(); - try - { - if ( self::$links[self::$link_name] ) - { - return self::$links[self::$link_name]->lpop($key); - } - } - catch (Exception $e) - { - $msg = "PHP Fatal error: Uncaught exception 'RedisException' with message '".$e->getMessage()."'\n"; - log::warn($msg); - if ($e->getCode() == 0) - { - self::$links[self::$link_name]->close(); - self::$links[self::$link_name] = null; - usleep(100000); - return self::lpop($key); - } - } - return NULL; - } - - /** - * rpop 从右边弹出数据, 并删除数据 - * - * @param mixed $key - * @return void - * @author seatle - * @created time :2015-12-13 01:05 - */ - public static function rpop($key) - { - self::init(); - try - { - if ( self::$links[self::$link_name] ) - { - return self::$links[self::$link_name]->rpop($key); - } - } - catch (Exception $e) - { - $msg = "PHP Fatal error: Uncaught exception 'RedisException' with message '".$e->getMessage()."'\n"; - log::warn($msg); - if ($e->getCode() == 0) - { - self::$links[self::$link_name]->close(); - self::$links[self::$link_name] = null; - usleep(100000); - return self::rpop($key); - } - } - return NULL; - } - - /** - * lsize 队列长度,同llen - * - * @param mixed $key - * @return void - * @author seatle - * @created time :2015-12-13 01:05 - */ - public static function lsize($key) - { - self::init(); - try - { - if ( self::$links[self::$link_name] ) - { - return self::$links[self::$link_name]->lSize($key); - } - } - catch (Exception $e) - { - $msg = "PHP Fatal error: Uncaught exception 'RedisException' with message '".$e->getMessage()."'\n"; - log::warn($msg); - if ($e->getCode() == 0) - { - self::$links[self::$link_name]->close(); - self::$links[self::$link_name] = null; - usleep(100000); - return self::lsize($key); - } - } - return NULL; - } - - /** - * lget 获取数据 - * - * @param mixed $key - * @param int $index - * @return void - * @author seatle - * @created time :2015-12-13 01:05 - */ - public static function lget($key, $index = 0) - { - self::init(); - try - { - if ( self::$links[self::$link_name] ) - { - return self::$links[self::$link_name]->lget($key, $index); - } - } - catch (Exception $e) - { - $msg = "PHP Fatal error: Uncaught exception 'RedisException' with message '".$e->getMessage()."'\n"; - log::warn($msg); - if ($e->getCode() == 0) - { - self::$links[self::$link_name]->close(); - self::$links[self::$link_name] = null; - usleep(100000); - return self::lget($key, $index); - } - } - return NULL; - } - - /** - * lRange 获取范围数据 - * - * @param mixed $key - * @param mixed $start - * @param mixed $end - * @return void - * @author seatle - * @created time :2015-12-13 01:05 - */ - public static function lrange($key, $start, $end) - { - self::init(); - try - { - if ( self::$links[self::$link_name] ) - { - return self::$links[self::$link_name]->lRange($key, $start, $end); - } - } - catch (Exception $e) - { - $msg = "PHP Fatal error: Uncaught exception 'RedisException' with message '".$e->getMessage()."'\n"; - log::warn($msg); - if ($e->getCode() == 0) - { - self::$links[self::$link_name]->close(); - self::$links[self::$link_name] = null; - usleep(100000); - return self::lrange($key, $start, $end); - } - } - return NULL; - } - - /** - * rlist 从右边弹出 $length 长度数据,并删除数据 - * - * @param mixed $key - * @param mixed $length - * @return void - * @author seatle - * @created time :2015-12-13 01:05 - */ - public static function rlist($key, $length) - { - $queue_length = self::lsize($key); - // 如果队列中有数据 - if ($queue_length > 0) - { - $list = array(); - $count = ($queue_length >= $length) ? $length : $queue_length; - for ($i = 0; $i < $count; $i++) - { - $data = self::rpop($key); - if ($data === false) - { - continue; - } - - $list[] = $data; - } - return $list; - } - else - { - // 没有数据返回NULL - return NULL; - } - } - - /** - * keys - * - * @param mixed $key - * @return void - * @author seatle - * @created time :2015-12-13 01:05 - * 查找符合给定模式的key。 - * KEYS *命中数据库中所有key。 - * KEYS h?llo命中hello, hallo and hxllo等。 - * KEYS h*llo命中hllo和heeeeello等。 - * KEYS h[ae]llo命中hello和hallo,但不命中hillo。 - * 特殊符号用"\"隔开 - * 因为这个类加了OPT_PREFIX前缀,所以并不能真的列出redis所有的key,需要的话,要把前缀去掉 - */ - public static function keys($key) - { - self::init(); - try - { - if ( self::$links[self::$link_name] ) - { - return self::$links[self::$link_name]->keys($key); - } - } - catch (Exception $e) - { - $msg = "PHP Fatal error: Uncaught exception 'RedisException' with message '".$e->getMessage()."'\n"; - log::warn($msg); - if ($e->getCode() == 0) - { - self::$links[self::$link_name]->close(); - self::$links[self::$link_name] = null; - usleep(100000); - return self::keys($key); - } - } - return NULL; - } - - /** - * ttl 返回某个KEY的过期时间 - * 正数:剩余多少秒 - * -1:永不超时 - * -2:key不存在 - * @param mixed $key - * @return void - * @author seatle - * @created time :2015-12-13 01:05 - */ - public static function ttl($key) - { - self::init(); - try - { - if ( self::$links[self::$link_name] ) - { - return self::$links[self::$link_name]->ttl($key); - } - } - catch (Exception $e) - { - $msg = "PHP Fatal error: Uncaught exception 'RedisException' with message '".$e->getMessage()."'\n"; - log::warn($msg); - if ($e->getCode() == 0) - { - self::$links[self::$link_name]->close(); - self::$links[self::$link_name] = null; - usleep(100000); - return self::ttl($key); - } - } - return NULL; - } - - /** - * expire 为某个key设置过期时间,同setTimeout - * - * @param mixed $key - * @param mixed $expire - * @return void - * @author seatle - * @created time :2015-12-13 01:05 - */ - public static function expire($key, $expire) - { - self::init(); - try - { - if ( self::$links[self::$link_name] ) - { - return self::$links[self::$link_name]->expire($key, $expire); - } - } - catch (Exception $e) - { - $msg = "PHP Fatal error: Uncaught exception 'RedisException' with message '".$e->getMessage()."'\n"; - log::warn($msg); - if ($e->getCode() == 0) - { - self::$links[self::$link_name]->close(); - self::$links[self::$link_name] = null; - usleep(100000); - return self::expire($key, $expire); - } - } - return NULL; - } - - /** - * exists key值是否存在 - * - * @param mixed $key - * @return void - * @author seatle - * @created time :2015-12-13 01:05 - */ - public static function exists($key) - { - self::init(); - try - { - if ( self::$links[self::$link_name] ) - { - return self::$links[self::$link_name]->exists($key); - } - } - catch (Exception $e) - { - $msg = "PHP Fatal error: Uncaught exception 'RedisException' with message '".$e->getMessage()."'\n"; - log::warn($msg); - if ($e->getCode() == 0) - { - self::$links[self::$link_name]->close(); - self::$links[self::$link_name] = null; - usleep(100000); - return self::exists($key); - } - } - return false; - } - - /** - * ping 检查当前redis是否存在且是否可以连接上 - * - * @return void - * @author seatle - * @created time :2015-12-13 01:05 - */ - //protected static function ping() - //{ - //if ( empty (self::$links[self::$link_name]) ) - //{ - //return false; - //} - //return self::$links[self::$link_name]->ping() == '+PONG'; - //} - - public static function encode($value) - { - return json_encode($value, JSON_UNESCAPED_UNICODE); - } - - public static function decode($value) - { - return json_decode($value, true); - } -} - - diff --git a/vendor/owner888/phpspider/library/cls_redis_client.php b/vendor/owner888/phpspider/library/cls_redis_client.php deleted file mode 100644 index 890d928..0000000 --- a/vendor/owner888/phpspider/library/cls_redis_client.php +++ /dev/null @@ -1,121 +0,0 @@ - - * @created time :2018-01-03 - */ -class cls_redis_client -{ - private $redis_socket = false; - //private $command = ''; - - public function __construct($host='127.0.0.1', $port=6379, $timeout = 3) - { - $this->redis_socket = stream_socket_client("tcp://".$host.":".$port, $errno, $errstr, $timeout); - if ( !$this->redis_socket ) - { - throw new Exception("{$errno} - {$errstr}"); - } - } - - public function __destruct() - { - fclose($this->redis_socket); - } - - public function __call($name, $args) - { - $crlf = "\r\n"; - array_unshift($args, $name); - $command = '*' . count($args) . $crlf; - foreach ($args as $arg) - { - $command .= '$' . strlen($arg) . $crlf . $arg . $crlf; - } - //echo $command."\n"; - $fwrite = fwrite($this->redis_socket, $command); - if ($fwrite === FALSE || $fwrite <= 0) - { - throw new Exception('Failed to write entire command to stream'); - } - return $this->read_response(); - } - - private function read_response() - { - $reply = trim(fgets($this->redis_socket, 1024)); - switch (substr($reply, 0, 1)) - { - case '-': - throw new Exception(trim(substr($reply, 1))); - break; - case '+': - $response = substr(trim($reply), 1); - if ($response === 'OK') - { - $response = TRUE; - } - break; - case '$': - $response = NULL; - if ($reply == '$-1') - { - break; - } - $read = 0; - $size = intval(substr($reply, 1)); - if ($size > 0) - { - do - { - $block_size = ($size - $read) > 1024 ? 1024 : ($size - $read); - $r = fread($this->redis_socket, $block_size); - if ($r === FALSE) - { - throw new Exception('Failed to read response from stream'); - } - else - { - $read += strlen($r); - $response .= $r; - } - } - while ($read < $size); - } - fread($this->redis_socket, 2); /* discard crlf */ - break; - /* Multi-bulk reply */ - case '*': - $count = intval(substr($reply, 1)); - if ($count == '-1') - { - return NULL; - } - $response = array(); - for ($i = 0; $i < $count; $i++) - { - $response[] = $this->read_response(); - } - break; - /* Integer reply */ - case ':': - $response = intval(substr(trim($reply), 1)); - break; - default: - throw new RedisException("Unknown response: {$reply}"); - break; - } - return $response; - } -} - - -//$redis = new cls_redis_client(); -//var_dump($redis->auth("foobared")); -//var_dump($redis->set("name",'abc')); -//var_dump($redis->get("name")); - diff --git a/vendor/owner888/phpspider/library/cls_redis_server.php b/vendor/owner888/phpspider/library/cls_redis_server.php deleted file mode 100644 index a206450..0000000 --- a/vendor/owner888/phpspider/library/cls_redis_server.php +++ /dev/null @@ -1,179 +0,0 @@ - - * @created time :2018-01-03 - */ -class cls_redis_server -{ - private $socket = false; - private $process_num = 3; - public $redis_kv_data = array(); - public $onMessage = null; - - public function __construct($host="0.0.0.0", $port=6379) - { - $this->socket = stream_socket_server("tcp://".$host.":".$port,$errno, $errstr); - if (!$this->socket) die($errstr."--".$errno); - echo "listen $host $port \r\n"; - } - - private function parse_resp(&$conn) - { - // 读取一行,遇到 \r\n 为一行 - $line = fgets($conn); - if($line === '' || $line === false) - { - return null; - } - // 获取第一个字符作为类型 - $type = $line[0]; - // 去掉第一个字符,去掉结尾的 \r\n - $line = mb_substr($line, 1, -2); - switch ( $type ) - { - case "*": - // 得到长度 - $count = (int) $line; - $data = array(); - for ($i = 1; $i <= $count; $i++) - { - $data[] = $this->parse_resp($conn); - } - return $data; - case "$": - if ($line == '-1') - { - return null; - } - // 截取的长度要加上 \r\n 两个字符 - $length = $line + 2; - $data = ''; - while ($length > 0) - { - $block = fread($conn, $length); - if ($length !== strlen($block)) - { - throw new Exception('RECEIVING'); - } - $data .= $block; - $length -= mb_strlen($block); - } - return mb_substr($data, 0, -2); - } - return $line; - } - - private function start_worker_process() - { - $pid = pcntl_fork(); - switch ($pid) - { - case -1: - echo "fork error : {$i} \r\n"; - exit; - case 0: - while ( true ) - { - echo "PID ".posix_getpid()." waiting...\n"; - // 堵塞等待 - $conn = stream_socket_accept($this->socket, -1); - if ( !$conn ) - { - continue; - } - //"*3\r\n$3\r\nSET\r\n$5\r\nmykey\r\n$7\r\nmyvalue\r\n" - while( true ) - { - $arr = $this->parse_resp($conn); - if ( is_array($arr) ) - { - if ($this->onMessage) - { - call_user_func($this->onMessage, $conn, $arr); - } - } - else if ( $arr ) - { - if ($this->onMessage) - { - call_user_func($this->onMessage, $conn, $arr); - } - } - else - { - fclose($conn); - break; - } - } - } - default: - $this->pids[$pid] = $pid; - break; - } - } - - public function run() - { - for($i = 1; $i <= $this->process_num; $i++) - { - $this->start_worker_process(); - } - - while( true ) - { - foreach ($this->pids as $i => $pid) - { - if($pid) - { - $res = pcntl_waitpid($pid, $status,WNOHANG); - - if ( $res == -1 || $res > 0 ) - { - $this->start_worker_process(); - unset($this->pids[$pid]); - } - } - } - sleep(1); - } - } - -} - -$server = new cls_redis_server(); -$server->onMessage = function($conn, $info) use($server) -{ - if ( is_array($info) ) - { - $command = strtoupper($info[0]); - if ( $command == "SET" ) - { - $key = $info[1]; - $val = $info[2]; - $server->redis_kv_data[$key] = $val; - fwrite($conn, "+OK\r\n"); - } - else if ( $command == "GET" ) - { - $key = $info[1]; - $val = isset($server->redis_kv_data[$key]) ? $server->redis_kv_data[$key] : ''; - fwrite($conn, "$".strlen($val)."\r\n".$val."\r\n"); - } - else - { - fwrite($conn,"+OK\r\n"); - } - } - else - { - fwrite($conn,"+OK\r\n"); - } -}; -$server->run(); diff --git a/vendor/owner888/phpspider/library/phpquery.php b/vendor/owner888/phpspider/library/phpquery.php deleted file mode 100644 index d4be6e4..0000000 --- a/vendor/owner888/phpspider/library/phpquery.php +++ /dev/null @@ -1,5727 +0,0 @@ - - * @license http://www.opensource.org/licenses/mit-license.php MIT License - * @package phpQuery - */ - -namespace phpspider\library; -use DOMDocument; -use DOMXpath; -use Exception; - -// class names for instanceof -// TODO move them as class constants into phpQuery -define('DOMDOCUMENT', 'DOMDocument'); -define('DOMELEMENT', 'DOMElement'); -define('DOMNODELIST', 'DOMNodeList'); -define('DOMNODE', 'DOMNode'); - -/** - * DOMEvent class. - * - * Based on - * @link http://developer.mozilla.org/En/DOM:event - * @author Tobiasz Cudnik - * @package phpQuery - * @todo implement ArrayAccess ? - */ -class DOMEvent { - - /** - * Returns a boolean indicating whether the event bubbles up through the DOM or not. - * - * @var unknown_type - */ - public $bubbles = true; - /** - * Returns a boolean indicating whether the event is cancelable. - * - * @var unknown_type - */ - public $cancelable = true; - /** - * Returns a reference to the currently registered target for the event. - * - * @var unknown_type - */ - public $currentTarget; - /** - * Returns detail about the event, depending on the type of event. - * - * @var unknown_type - * @link http://developer.mozilla.org/en/DOM/event.detail - */ - public $detail; // ??? - /** - * Used to indicate which phase of the event flow is currently being evaluated. - * - * NOT IMPLEMENTED - * - * @var unknown_type - * @link http://developer.mozilla.org/en/DOM/event.eventPhase - */ - public $eventPhase; // ??? - /** - * The explicit original target of the event (Mozilla-specific). - * - * NOT IMPLEMENTED - * - * @var unknown_type - */ - public $explicitOriginalTarget; // moz only - /** - * The original target of the event, before any retargetings (Mozilla-specific). - * - * NOT IMPLEMENTED - * - * @var unknown_type - */ - public $originalTarget; // moz only - /** - * Identifies a secondary target for the event. - * - * @var unknown_type - */ - public $relatedTarget; - /** - * Returns a reference to the target to which the event was originally dispatched. - * - * @var unknown_type - */ - public $target; - /** - * Returns the time that the event was created. - * - * @var unknown_type - */ - public $timeStamp; - /** - * Returns the name of the event (case-insensitive). - */ - public $type; - public $runDefault = true; - public $data = null; - public function __construct($data) { - foreach($data as $k => $v) { - $this->$k = $v; - } - if (! $this->timeStamp) - $this->timeStamp = time(); - } - /** - * Cancels the event (if it is cancelable). - * - */ - public function preventDefault() { - $this->runDefault = false; - } - /** - * Stops the propagation of events further along in the DOM. - * - */ - public function stopPropagation() { - $this->bubbles = false; - } -} - - -/** - * DOMDocumentWrapper class simplifies work with DOMDocument. - * - * Know bug: - * - in XHTML fragments,
    changes to
    - * - * @todo check XML catalogs compatibility - * @author Tobiasz Cudnik - * @package phpQuery - */ -class DOMDocumentWrapper { - /** - * @var DOMDocument - */ - public $document; - public $id; - /** - * @todo Rewrite as method and quess if null. - * @var unknown_type - */ - public $contentType = ''; - public $xpath; - public $uuid = 0; - public $data = array(); - public $dataNodes = array(); - public $events = array(); - public $eventsNodes = array(); - public $eventsGlobal = array(); - /** - * @TODO iframes support http://code.google.com/p/phpquery/issues/detail?id=28 - * @var unknown_type - */ - public $frames = array(); - /** - * Document root, by default equals to document itself. - * Used by documentFragments. - * - * @var DOMNode - */ - public $root; - public $isDocumentFragment; - public $isXML = false; - public $isXHTML = false; - public $isHTML = false; - public $charset; - public function __construct($markup = null, $contentType = null, $newDocumentID = null) { - if (isset($markup)) - $this->load($markup, $contentType, $newDocumentID); - $this->id = $newDocumentID - ? $newDocumentID - : md5(microtime()); - } - public function load($markup, $contentType = null, $newDocumentID = null) { -// phpQuery::$documents[$id] = $this; - $this->contentType = strtolower($contentType); - if ($markup instanceof DOMDOCUMENT) { - $this->document = $markup; - $this->root = $this->document; - $this->charset = $this->document->encoding; - // TODO isDocumentFragment - } else { - $loaded = $this->loadMarkup($markup); - } - if ($loaded) { -// $this->document->formatOutput = true; - $this->document->preserveWhiteSpace = true; - $this->xpath = new DOMXPath($this->document); - $this->afterMarkupLoad(); - return true; - // remember last loaded document -// return phpQuery::selectDocument($id); - } - return false; - } - protected function afterMarkupLoad() { - if ($this->isXHTML) { - $this->xpath->registerNamespace("html", "http://www.w3.org/1999/xhtml"); - } - } - protected function loadMarkup($markup) { - $loaded = false; - if ($this->contentType) { - self::debug("Load markup for content type {$this->contentType}"); - // content determined by contentType - list($contentType, $charset) = $this->contentTypeToArray($this->contentType); - switch($contentType) { - case 'text/html': - phpQuery::debug("Loading HTML, content type '{$this->contentType}'"); - $loaded = $this->loadMarkupHTML($markup, $charset); - break; - case 'text/xml': - case 'application/xhtml+xml': - phpQuery::debug("Loading XML, content type '{$this->contentType}'"); - $loaded = $this->loadMarkupXML($markup, $charset); - break; - default: - // for feeds or anything that sometimes doesn't use text/xml - if (strpos('xml', $this->contentType) !== false) { - phpQuery::debug("Loading XML, content type '{$this->contentType}'"); - $loaded = $this->loadMarkupXML($markup, $charset); - } else - phpQuery::debug("Could not determine document type from content type '{$this->contentType}'"); - } - } else { - // content type autodetection - if ($this->isXML($markup)) { - phpQuery::debug("Loading XML, isXML() == true"); - $loaded = $this->loadMarkupXML($markup); - if (! $loaded && $this->isXHTML) { - phpQuery::debug('Loading as XML failed, trying to load as HTML, isXHTML == true'); - $loaded = $this->loadMarkupHTML($markup); - } - } else { - phpQuery::debug("Loading HTML, isXML() == false"); - $loaded = $this->loadMarkupHTML($markup); - } - } - return $loaded; - } - protected function loadMarkupReset() { - $this->isXML = $this->isXHTML = $this->isHTML = false; - } - protected function documentCreate($charset, $version = '1.0') { - if (! $version) - $version = '1.0'; - $this->document = new DOMDocument($version, $charset); - $this->charset = $this->document->encoding; -// $this->document->encoding = $charset; - $this->document->formatOutput = true; - $this->document->preserveWhiteSpace = true; - } - protected function loadMarkupHTML($markup, $requestedCharset = null) { - if (phpQuery::$debug) - phpQuery::debug('Full markup load (HTML): '.substr($markup, 0, 250)); - $this->loadMarkupReset(); - $this->isHTML = true; - if (!isset($this->isDocumentFragment)) - $this->isDocumentFragment = self::isDocumentFragmentHTML($markup); - $charset = null; - $documentCharset = $this->charsetFromHTML($markup); - $addDocumentCharset = false; - if ($documentCharset) { - $charset = $documentCharset; - $markup = $this->charsetFixHTML($markup); - } else if ($requestedCharset) { - $charset = $requestedCharset; - } - if (! $charset) - $charset = phpQuery::$defaultCharset; - // HTTP 1.1 says that the default charset is ISO-8859-1 - // @see http://www.w3.org/International/O-HTTP-charset - if (! $documentCharset) { - $documentCharset = 'ISO-8859-1'; - $addDocumentCharset = true; - } - // Should be careful here, still need 'magic encoding detection' since lots of pages have other 'default encoding' - // Worse, some pages can have mixed encodings... we'll try not to worry about that - $requestedCharset = strtoupper($requestedCharset); - $documentCharset = strtoupper($documentCharset); - phpQuery::debug("DOC: $documentCharset REQ: $requestedCharset"); - if ($requestedCharset && $documentCharset && $requestedCharset !== $documentCharset) { - phpQuery::debug("CHARSET CONVERT"); - // Document Encoding Conversion - // http://code.google.com/p/phpquery/issues/detail?id=86 - if (function_exists('mb_detect_encoding')) { - $possibleCharsets = array($documentCharset, $requestedCharset, 'AUTO'); - $docEncoding = mb_detect_encoding($markup, implode(', ', $possibleCharsets)); - if (! $docEncoding) - $docEncoding = $documentCharset; // ok trust the document - phpQuery::debug("DETECTED '$docEncoding'"); - // Detected does not match what document says... - if ($docEncoding !== $documentCharset) { - // Tricky.. - } - if ($docEncoding !== $requestedCharset) { - phpQuery::debug("CONVERT $docEncoding => $requestedCharset"); - $markup = mb_convert_encoding($markup, $requestedCharset, $docEncoding); - $markup = $this->charsetAppendToHTML($markup, $requestedCharset); - $charset = $requestedCharset; - } - } else { - phpQuery::debug("TODO: charset conversion without mbstring..."); - } - } - $return = false; - if ($this->isDocumentFragment) { - phpQuery::debug("Full markup load (HTML), DocumentFragment detected, using charset '$charset'"); - $return = $this->documentFragmentLoadMarkup($this, $charset, $markup); - } else { - if ($addDocumentCharset) { - phpQuery::debug("Full markup load (HTML), appending charset: '$charset'"); - $markup = $this->charsetAppendToHTML($markup, $charset); - } - phpQuery::debug("Full markup load (HTML), documentCreate('$charset')"); - $this->documentCreate($charset); - $return = phpQuery::$debug === 2 - ? $this->document->loadHTML($markup) - : @$this->document->loadHTML($markup); - if ($return) - $this->root = $this->document; - } - if ($return && ! $this->contentType) - $this->contentType = 'text/html'; - return $return; - } - protected function loadMarkupXML($markup, $requestedCharset = null) { - if (phpQuery::$debug) - phpQuery::debug('Full markup load (XML): '.substr($markup, 0, 250)); - $this->loadMarkupReset(); - $this->isXML = true; - // check agains XHTML in contentType or markup - $isContentTypeXHTML = $this->isXHTML(); - $isMarkupXHTML = $this->isXHTML($markup); - if ($isContentTypeXHTML || $isMarkupXHTML) { - self::debug('Full markup load (XML), XHTML detected'); - $this->isXHTML = true; - } - // determine document fragment - if (! isset($this->isDocumentFragment)) - $this->isDocumentFragment = $this->isXHTML - ? self::isDocumentFragmentXHTML($markup) - : self::isDocumentFragmentXML($markup); - // this charset will be used - $charset = null; - // charset from XML declaration @var string - $documentCharset = $this->charsetFromXML($markup); - if (! $documentCharset) { - if ($this->isXHTML) { - // this is XHTML, try to get charset from content-type meta header - $documentCharset = $this->charsetFromHTML($markup); - if ($documentCharset) { - phpQuery::debug("Full markup load (XML), appending XHTML charset '$documentCharset'"); - $this->charsetAppendToXML($markup, $documentCharset); - $charset = $documentCharset; - } - } - if (! $documentCharset) { - // if still no document charset... - $charset = $requestedCharset; - } - } else if ($requestedCharset) { - $charset = $requestedCharset; - } - if (! $charset) { - $charset = phpQuery::$defaultCharset; - } - if ($requestedCharset && $documentCharset && $requestedCharset != $documentCharset) { - // TODO place for charset conversion -// $charset = $requestedCharset; - } - $return = false; - if ($this->isDocumentFragment) { - phpQuery::debug("Full markup load (XML), DocumentFragment detected, using charset '$charset'"); - $return = $this->documentFragmentLoadMarkup($this, $charset, $markup); - } else { - // FIXME ??? - if ($isContentTypeXHTML && ! $isMarkupXHTML) - if (! $documentCharset) { - phpQuery::debug("Full markup load (XML), appending charset '$charset'"); - $markup = $this->charsetAppendToXML($markup, $charset); - } - // see http://pl2.php.net/manual/en/book.dom.php#78929 - // LIBXML_DTDLOAD (>= PHP 5.1) - // does XML ctalogues works with LIBXML_NONET - // $this->document->resolveExternals = true; - // TODO test LIBXML_COMPACT for performance improvement - // create document - $this->documentCreate($charset); - if (phpversion() < 5.1) { - $this->document->resolveExternals = true; - $return = phpQuery::$debug === 2 - ? $this->document->loadXML($markup) - : @$this->document->loadXML($markup); - } else { - /** @link http://pl2.php.net/manual/en/libxml.constants.php */ - $libxmlStatic = phpQuery::$debug === 2 - ? LIBXML_DTDLOAD|LIBXML_DTDATTR|LIBXML_NONET - : LIBXML_DTDLOAD|LIBXML_DTDATTR|LIBXML_NONET|LIBXML_NOWARNING|LIBXML_NOERROR; - $return = $this->document->loadXML($markup, $libxmlStatic); -// if (! $return) -// $return = $this->document->loadHTML($markup); - } - if ($return) - $this->root = $this->document; - } - if ($return) { - if (! $this->contentType) { - if ($this->isXHTML) - $this->contentType = 'application/xhtml+xml'; - else - $this->contentType = 'text/xml'; - } - return $return; - } else { - throw new Exception("Error loading XML markup"); - } - } - protected function isXHTML($markup = null) { - if (! isset($markup)) { - return strpos($this->contentType, 'xhtml') !== false; - } - // XXX ok ? - return strpos($markup, "doctype) && is_object($dom->doctype) -// ? $dom->doctype->publicId -// : self::$defaultDoctype; - } - protected function isXML($markup) { -// return strpos($markup, ']+http-equiv\\s*=\\s*(["|\'])Content-Type\\1([^>]+?)>@i', - $markup, $matches - ); - if (! isset($matches[0])) - return array(null, null); - // get attr 'content' - preg_match('@content\\s*=\\s*(["|\'])(.+?)\\1@', $matches[0], $matches); - if (! isset($matches[0])) - return array(null, null); - return $this->contentTypeToArray($matches[2]); - } - protected function charsetFromHTML($markup) { - $contentType = $this->contentTypeFromHTML($markup); - return $contentType[1]; - } - protected function charsetFromXML($markup) { - $matches; - // find declaration - preg_match('@<'.'?xml[^>]+encoding\\s*=\\s*(["|\'])(.*?)\\1@i', - $markup, $matches - ); - return isset($matches[2]) - ? strtolower($matches[2]) - : null; - } - /** - * Repositions meta[type=charset] at the start of head. Bypasses DOMDocument bug. - * - * @link http://code.google.com/p/phpquery/issues/detail?id=80 - * @param $html - */ - protected function charsetFixHTML($markup) { - $matches = array(); - // find meta tag - preg_match('@\s*]+http-equiv\\s*=\\s*(["|\'])Content-Type\\1([^>]+?)>@i', - $markup, $matches, PREG_OFFSET_CAPTURE - ); - if (! isset($matches[0])) - return; - $metaContentType = $matches[0][0]; - $markup = substr($markup, 0, $matches[0][1]) - .substr($markup, $matches[0][1]+strlen($metaContentType)); - $headStart = stripos($markup, ''); - $markup = substr($markup, 0, $headStart+6).$metaContentType - .substr($markup, $headStart+6); - return $markup; - } - protected function charsetAppendToHTML($html, $charset, $xhtml = false) { - // remove existing meta[type=content-type] - $html = preg_replace('@\s*]+http-equiv\\s*=\\s*(["|\'])Content-Type\\1([^>]+?)>@i', '', $html); - $meta = ''; - if (strpos($html, ')@s', - "{$meta}", - $html - ); - } - } else { - return preg_replace( - '@)@s', - ''.$meta, - $html - ); - } - } - protected function charsetAppendToXML($markup, $charset) { - $declaration = '<'.'?xml version="1.0" encoding="'.$charset.'"?'.'>'; - return $declaration.$markup; - } - public static function isDocumentFragmentHTML($markup) { - return stripos($markup, 'documentFragmentCreate($node, $sourceCharset); -// if ($fake === false) -// throw new Exception("Error loading documentFragment markup"); -// else -// $return = array_merge($return, -// $this->import($fake->root->childNodes) -// ); -// } else { -// $return[] = $this->document->importNode($node, true); -// } -// } -// return $return; -// } else { -// // string markup -// $fake = $this->documentFragmentCreate($source, $sourceCharset); -// if ($fake === false) -// throw new Exception("Error loading documentFragment markup"); -// else -// return $this->import($fake->root->childNodes); -// } - if (is_array($source) || $source instanceof DOMNODELIST) { - // dom nodes - self::debug('Importing nodes to document'); - foreach($source as $node) - $return[] = $this->document->importNode($node, true); - } else { - // string markup - $fake = $this->documentFragmentCreate($source, $sourceCharset); - if ($fake === false) - throw new Exception("Error loading documentFragment markup"); - else - return $this->import($fake->root->childNodes); - } - return $return; - } - /** - * Creates new document fragment. - * - * @param $source - * @return DOMDocumentWrapper - */ - protected function documentFragmentCreate($source, $charset = null) { - $fake = new DOMDocumentWrapper(); - $fake->contentType = $this->contentType; - $fake->isXML = $this->isXML; - $fake->isHTML = $this->isHTML; - $fake->isXHTML = $this->isXHTML; - $fake->root = $fake->document; - if (! $charset) - $charset = $this->charset; -// $fake->documentCreate($this->charset); - if ($source instanceof DOMNODE && !($source instanceof DOMNODELIST)) - $source = array($source); - if (is_array($source) || $source instanceof DOMNODELIST) { - // dom nodes - // load fake document - if (! $this->documentFragmentLoadMarkup($fake, $charset)) - return false; - $nodes = $fake->import($source); - foreach($nodes as $node) - $fake->root->appendChild($node); - } else { - // string markup - $this->documentFragmentLoadMarkup($fake, $charset, $source); - } - return $fake; - } - /** - * - * @param $document DOMDocumentWrapper - * @param $markup - * @return $document - */ - private function documentFragmentLoadMarkup($fragment, $charset, $markup = null) { - // TODO error handling - // TODO copy doctype - // tempolary turn off - $fragment->isDocumentFragment = false; - if ($fragment->isXML) { - if ($fragment->isXHTML) { - // add FAKE element to set default namespace - $fragment->loadMarkupXML('' - .'' - .''.$markup.''); - $fragment->root = $fragment->document->firstChild->nextSibling; - } else { - $fragment->loadMarkupXML(''.$markup.''); - $fragment->root = $fragment->document->firstChild; - } - } else { - $markup2 = phpQuery::$defaultDoctype.''; - $noBody = strpos($markup, 'loadMarkupHTML($markup2); - // TODO resolv body tag merging issue - $fragment->root = $noBody - ? $fragment->document->firstChild->nextSibling->firstChild->nextSibling - : $fragment->document->firstChild->nextSibling->firstChild->nextSibling; - } - if (! $fragment->root) - return false; - $fragment->isDocumentFragment = true; - return true; - } - protected function documentFragmentToMarkup($fragment) { - phpQuery::debug('documentFragmentToMarkup'); - $tmp = $fragment->isDocumentFragment; - $fragment->isDocumentFragment = false; - $markup = $fragment->markup(); - if ($fragment->isXML) { - $markup = substr($markup, 0, strrpos($markup, '')); - if ($fragment->isXHTML) { - $markup = substr($markup, strpos($markup, '')+6); - } - } else { - $markup = substr($markup, strpos($markup, '')+6); - $markup = substr($markup, 0, strrpos($markup, '')); - } - $fragment->isDocumentFragment = $tmp; - if (phpQuery::$debug) - phpQuery::debug('documentFragmentToMarkup: '.substr($markup, 0, 150)); - return $markup; - } - /** - * Return document markup, starting with optional $nodes as root. - * - * @param $nodes DOMNode|DOMNodeList - * @return string - */ - public function markup($nodes = null, $innerMarkup = false) { - if (isset($nodes) && count($nodes) == 1 && $nodes[0] instanceof DOMDOCUMENT) - $nodes = null; - if (isset($nodes)) { - $markup = ''; - if (!is_array($nodes) && !($nodes instanceof DOMNODELIST) ) - $nodes = array($nodes); - if ($this->isDocumentFragment && ! $innerMarkup) - foreach($nodes as $i => $node) - if ($node->isSameNode($this->root)) { - // var_dump($node); - $nodes = array_slice($nodes, 0, $i) - + phpQuery::DOMNodeListToArray($node->childNodes) - + array_slice($nodes, $i+1); - } - if ($this->isXML && ! $innerMarkup) { - self::debug("Getting outerXML with charset '{$this->charset}'"); - // we need outerXML, so we can benefit from - // $node param support in saveXML() - foreach($nodes as $node) - $markup .= $this->document->saveXML($node); - } else { - $loop = array(); - if ($innerMarkup) - foreach($nodes as $node) { - if ($node->childNodes) - foreach($node->childNodes as $child) - $loop[] = $child; - else - $loop[] = $node; - } - else - $loop = $nodes; - self::debug("Getting markup, moving selected nodes (".count($loop).") to new DocumentFragment"); - $fake = $this->documentFragmentCreate($loop); - $markup = $this->documentFragmentToMarkup($fake); - } - if ($this->isXHTML) { - self::debug("Fixing XHTML"); - $markup = self::markupFixXHTML($markup); - } - self::debug("Markup: ".substr($markup, 0, 250)); - return $markup; - } else { - if ($this->isDocumentFragment) { - // documentFragment, html only... - self::debug("Getting markup, DocumentFragment detected"); -// return $this->markup( -//// $this->document->getElementsByTagName('body')->item(0) -// $this->document->root, true -// ); - $markup = $this->documentFragmentToMarkup($this); - // no need for markupFixXHTML, as it's done thought markup($nodes) method - return $markup; - } else { - self::debug("Getting markup (".($this->isXML?'XML':'HTML')."), final with charset '{$this->charset}'"); - $markup = $this->isXML - ? $this->document->saveXML() - : $this->document->saveHTML(); - if ($this->isXHTML) { - self::debug("Fixing XHTML"); - $markup = self::markupFixXHTML($markup); - } - self::debug("Markup: ".substr($markup, 0, 250)); - return $markup; - } - } - } - protected static function markupFixXHTML($markup) { - $markup = self::expandEmptyTag('script', $markup); - $markup = self::expandEmptyTag('select', $markup); - $markup = self::expandEmptyTag('textarea', $markup); - return $markup; - } - public static function debug($text) { - phpQuery::debug($text); - } - /** - * expandEmptyTag - * - * @param $tag - * @param $xml - * @return unknown_type - * @author mjaque at ilkebenson dot com - * @link http://php.net/manual/en/domdocument.savehtml.php#81256 - */ - public static function expandEmptyTag($tag, $xml){ - $indice = 0; - while ($indice< strlen($xml)){ - $pos = strpos($xml, "<$tag ", $indice); - if ($pos){ - $posCierre = strpos($xml, ">", $pos); - if ($xml[$posCierre-1] == "/"){ - $xml = substr_replace($xml, ">", $posCierre-1, 2); - } - $indice = $posCierre; - } - else break; - } - return $xml; - } -} - -/** - * Event handling class. - * - * @author Tobiasz Cudnik - * @package phpQuery - * @static - */ -abstract class phpQueryEvents { - /** - * Trigger a type of event on every matched element. - * - * @param DOMNode|phpQueryObject|string $document - * @param unknown_type $type - * @param unknown_type $data - * - * @TODO exclusive events (with !) - * @TODO global events (test) - * @TODO support more than event in $type (space-separated) - */ - public static function trigger($document, $type, $data = array(), $node = null) { - // trigger: function(type, data, elem, donative, extra) { - $documentID = phpQuery::getDocumentID($document); - $namespace = null; - if (strpos($type, '.') !== false) - list($name, $namespace) = explode('.', $type); - else - $name = $type; - if (! $node) { - if (self::issetGlobal($documentID, $type)) { - $pq = phpQuery::getDocument($documentID); - // TODO check add($pq->document) - $pq->find('*')->add($pq->document) - ->trigger($type, $data); - } - } else { - if (isset($data[0]) && $data[0] instanceof DOMEvent) { - $event = $data[0]; - $event->relatedTarget = $event->target; - $event->target = $node; - $data = array_slice($data, 1); - } else { - $event = new DOMEvent(array( - 'type' => $type, - 'target' => $node, - 'timeStamp' => time(), - )); - } - $i = 0; - while($node) { - // TODO whois - phpQuery::debug("Triggering ".($i?"bubbled ":'')."event '{$type}' on " - ."node \n");//.phpQueryObject::whois($node)."\n"); - $event->currentTarget = $node; - $eventNode = self::getNode($documentID, $node); - if (isset($eventNode->eventHandlers)) { - foreach($eventNode->eventHandlers as $eventType => $handlers) { - $eventNamespace = null; - if (strpos($type, '.') !== false) - list($eventName, $eventNamespace) = explode('.', $eventType); - else - $eventName = $eventType; - if ($name != $eventName) - continue; - if ($namespace && $eventNamespace && $namespace != $eventNamespace) - continue; - foreach($handlers as $handler) { - phpQuery::debug("Calling event handler\n"); - $event->data = $handler['data'] - ? $handler['data'] - : null; - $params = array_merge(array($event), $data); - $return = phpQuery::callbackRun($handler['callback'], $params); - if ($return === false) { - $event->bubbles = false; - } - } - } - } - // to bubble or not to bubble... - if (! $event->bubbles) - break; - $node = $node->parentNode; - $i++; - } - } - } - /** - * Binds a handler to one or more events (like click) for each matched element. - * Can also bind custom events. - * - * @param DOMNode|phpQueryObject|string $document - * @param unknown_type $type - * @param unknown_type $data Optional - * @param unknown_type $callback - * - * @TODO support '!' (exclusive) events - * @TODO support more than event in $type (space-separated) - * @TODO support binding to global events - */ - public static function add($document, $node, $type, $data, $callback = null) { - phpQuery::debug("Binding '$type' event"); - $documentID = phpQuery::getDocumentID($document); -// if (is_null($callback) && is_callable($data)) { -// $callback = $data; -// $data = null; -// } - $eventNode = self::getNode($documentID, $node); - if (! $eventNode) - $eventNode = self::setNode($documentID, $node); - if (!isset($eventNode->eventHandlers[$type])) - $eventNode->eventHandlers[$type] = array(); - $eventNode->eventHandlers[$type][] = array( - 'callback' => $callback, - 'data' => $data, - ); - } - /** - * Enter description here... - * - * @param DOMNode|phpQueryObject|string $document - * @param unknown_type $type - * @param unknown_type $callback - * - * @TODO namespace events - * @TODO support more than event in $type (space-separated) - */ - public static function remove($document, $node, $type = null, $callback = null) { - $documentID = phpQuery::getDocumentID($document); - $eventNode = self::getNode($documentID, $node); - if (is_object($eventNode) && isset($eventNode->eventHandlers[$type])) { - if ($callback) { - foreach($eventNode->eventHandlers[$type] as $k => $handler) - if ($handler['callback'] == $callback) - unset($eventNode->eventHandlers[$type][$k]); - } else { - unset($eventNode->eventHandlers[$type]); - } - } - } - protected static function getNode($documentID, $node) { - foreach(phpQuery::$documents[$documentID]->eventsNodes as $eventNode) { - if ($node->isSameNode($eventNode)) - return $eventNode; - } - } - protected static function setNode($documentID, $node) { - phpQuery::$documents[$documentID]->eventsNodes[] = $node; - return phpQuery::$documents[$documentID]->eventsNodes[ - count(phpQuery::$documents[$documentID]->eventsNodes)-1 - ]; - } - protected static function issetGlobal($documentID, $type) { - return isset(phpQuery::$documents[$documentID]) - ? in_array($type, phpQuery::$documents[$documentID]->eventsGlobal) - : false; - } -} - - -interface ICallbackNamed { - function hasName(); - function getName(); -} -/** - * Callback class introduces currying-like pattern. - * - * Example: - * function foo($param1, $param2, $param3) { - * var_dump($param1, $param2, $param3); - * } - * $fooCurried = new Callback('foo', - * 'param1 is now statically set', - * new CallbackParam, new CallbackParam - * ); - * phpQuery::callbackRun($fooCurried, - * array('param2 value', 'param3 value' - * ); - * - * Callback class is supported in all phpQuery methods which accepts callbacks. - * - * @link http://code.google.com/p/phpquery/wiki/Callbacks#Param_Structures - * @author Tobiasz Cudnik - * - * @TODO??? return fake forwarding function created via create_function - * @TODO honor paramStructure - */ -class Callback - implements ICallbackNamed { - public $callback = null; - public $params = null; - protected $name; - public function __construct($callback, $param1 = null, $param2 = null, - $param3 = null) { - $params = func_get_args(); - $params = array_slice($params, 1); - if ($callback instanceof Callback) { - // TODO implement recurention - } else { - $this->callback = $callback; - $this->params = $params; - } - } - public function getName() { - return 'Callback: '.$this->name; - } - public function hasName() { - return isset($this->name) && $this->name; - } - public function setName($name) { - $this->name = $name; - return $this; - } - // TODO test me -// public function addParams() { -// $params = func_get_args(); -// return new Callback($this->callback, $this->params+$params); -// } -} -/** - * Shorthand for new Callback(create_function(...), ...); - * - * @author Tobiasz Cudnik - */ -class CallbackBody extends Callback { - public function __construct($paramList, $code, $param1 = null, $param2 = null, - $param3 = null) { - $params = func_get_args(); - $params = array_slice($params, 2); - $this->callback = create_function($paramList, $code); - $this->params = $params; - } -} -/** - * Callback type which on execution returns reference passed during creation. - * - * @author Tobiasz Cudnik - */ -class CallbackReturnReference extends Callback - implements ICallbackNamed { - protected $reference; - public function __construct(&$reference, $name = null){ - $this->reference =& $reference; - $this->callback = array($this, 'callback'); - } - public function callback() { - return $this->reference; - } - public function getName() { - return 'Callback: '.$this->name; - } - public function hasName() { - return isset($this->name) && $this->name; - } -} -/** - * Callback type which on execution returns value passed during creation. - * - * @author Tobiasz Cudnik - */ -class CallbackReturnValue extends Callback - implements ICallbackNamed { - protected $value; - protected $name; - public function __construct($value, $name = null){ - $this->value =& $value; - $this->name = $name; - $this->callback = array($this, 'callback'); - } - public function callback() { - return $this->value; - } - public function __toString() { - return $this->getName(); - } - public function getName() { - return 'Callback: '.$this->name; - } - public function hasName() { - return isset($this->name) && $this->name; - } -} -/** - * CallbackParameterToReference can be used when we don't really want a callback, - * only parameter passed to it. CallbackParameterToReference takes first - * parameter's value and passes it to reference. - * - * @author Tobiasz Cudnik - */ -class CallbackParameterToReference extends Callback { - /** - * @param $reference - * @TODO implement $paramIndex; - * param index choose which callback param will be passed to reference - */ - public function __construct(&$reference){ - $this->callback =& $reference; - } -} -//class CallbackReference extends Callback { -// /** -// * -// * @param $reference -// * @param $paramIndex -// * @todo implement $paramIndex; param index choose which callback param will be passed to reference -// */ -// public function __construct(&$reference, $name = null){ -// $this->callback =& $reference; -// } -//} -class CallbackParam {} - -/** - * Class representing phpQuery objects. - * - * @author Tobiasz Cudnik - * @package phpQuery - * @method phpQueryObject clone() clone() - * @method phpQueryObject empty() empty() - * @method phpQueryObject next() next($selector = null) - * @method phpQueryObject prev() prev($selector = null) - * @property Int $length - */ -class phpQueryObject - implements \Iterator, \Countable, \ArrayAccess { - public $documentID = null; - /** - * DOMDocument class. - * - * @var DOMDocument - */ - public $document = null; - public $charset = null; - /** - * - * @var DOMDocumentWrapper - */ - public $documentWrapper = null; - /** - * XPath interface. - * - * @var DOMXPath - */ - public $xpath = null; - /** - * Stack of selected elements. - * @TODO refactor to ->nodes - * @var array - */ - public $elements = array(); - /** - * @access private - */ - protected $elementsBackup = array(); - /** - * @access private - */ - protected $previous = null; - /** - * @access private - * @TODO deprecate - */ - protected $root = array(); - /** - * Indicated if doument is just a fragment (no tag). - * - * Every document is realy a full document, so even documentFragments can - * be queried against , but getDocument(id)->htmlOuter() will return - * only contents of . - * - * @var bool - */ - public $documentFragment = true; - /** - * Iterator interface helper - * @access private - */ - protected $elementsInterator = array(); - /** - * Iterator interface helper - * @access private - */ - protected $valid = false; - /** - * Iterator interface helper - * @access private - */ - protected $current = null; - /** - * Enter description here... - * - * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery - */ - public function __construct($documentID) { -// if ($documentID instanceof self) -// var_dump($documentID->getDocumentID()); - $id = $documentID instanceof self - ? $documentID->getDocumentID() - : $documentID; -// var_dump($id); - if (! isset(phpQuery::$documents[$id] )) { -// var_dump(phpQuery::$documents); - throw new Exception("Document with ID '{$id}' isn't loaded. Use phpQuery::newDocument(\$html) or phpQuery::newDocumentFile(\$file) first."); - } - $this->documentID = $id; - $this->documentWrapper =& phpQuery::$documents[$id]; - $this->document =& $this->documentWrapper->document; - $this->xpath =& $this->documentWrapper->xpath; - $this->charset =& $this->documentWrapper->charset; - $this->documentFragment =& $this->documentWrapper->isDocumentFragment; - // TODO check $this->DOM->documentElement; -// $this->root = $this->document->documentElement; - $this->root =& $this->documentWrapper->root; -// $this->toRoot(); - $this->elements = array($this->root); - } - /** - * - * @access private - * @param $attr - * @return unknown_type - */ - public function __get($attr) { - switch($attr) { - // FIXME doesnt work at all ? - case 'length': - return $this->size(); - break; - default: - return $this->$attr; - } - } - /** - * Saves actual object to $var by reference. - * Useful when need to break chain. - * @param phpQueryObject $var - * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery - */ - public function toReference(&$var) { - return $var = $this; - } - public function documentFragment($state = null) { - if ($state) { - phpQuery::$documents[$this->getDocumentID()]['documentFragment'] = $state; - return $this; - } - return $this->documentFragment; - } - /** - * @access private - * @TODO documentWrapper - */ - protected function isRoot( $node) { -// return $node instanceof DOMDOCUMENT || $node->tagName == 'html'; - return $node instanceof DOMDOCUMENT - || ($node instanceof DOMELEMENT && $node->tagName == 'html') - || $this->root->isSameNode($node); - } - /** - * @access private - */ - protected function stackIsRoot() { - return $this->size() == 1 && $this->isRoot($this->elements[0]); - } - /** - * Enter description here... - * NON JQUERY METHOD - * - * Watch out, it doesn't creates new instance, can be reverted with end(). - * - * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery - */ - public function toRoot() { - $this->elements = array($this->root); - return $this; -// return $this->newInstance(array($this->root)); - } - /** - * Saves object's DocumentID to $var by reference. - * - * $myDocumentId; - * phpQuery::newDocument('
    ') - * ->getDocumentIDRef($myDocumentId) - * ->find('div')->... - * - * - * @param unknown_type $domId - * @see phpQuery::newDocument - * @see phpQuery::newDocumentFile - * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery - */ - public function getDocumentIDRef(&$documentID) { - $documentID = $this->getDocumentID(); - return $this; - } - /** - * Returns object with stack set to document root. - * - * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery - */ - public function getDocument() { - return phpQuery::getDocument($this->getDocumentID()); - } - /** - * - * @return DOMDocument - */ - public function getDOMDocument() { - return $this->document; - } - /** - * Get object's Document ID. - * - * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery - */ - public function getDocumentID() { - return $this->documentID; - } - /** - * Unloads whole document from memory. - * CAUTION! None further operations will be possible on this document. - * All objects refering to it will be useless. - * - * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery - */ - public function unloadDocument() { - phpQuery::unloadDocuments($this->getDocumentID()); - } - public function isHTML() { - return $this->documentWrapper->isHTML; - } - public function isXHTML() { - return $this->documentWrapper->isXHTML; - } - public function isXML() { - return $this->documentWrapper->isXML; - } - /** - * Enter description here... - * - * @link http://docs.jquery.com/Ajax/serialize - * @return string - */ - public function serialize() { - return phpQuery::param($this->serializeArray()); - } - /** - * Enter description here... - * - * @link http://docs.jquery.com/Ajax/serializeArray - * @return array - */ - public function serializeArray($submit = null) { - $source = $this->filter('form, input, select, textarea') - ->find('input, select, textarea') - ->andSelf() - ->not('form'); - $return = array(); -// $source->dumpDie(); - foreach($source as $input) { - $input = phpQuery::pq($input); - if ($input->is('[disabled]')) - continue; - if (!$input->is('[name]')) - continue; - if ($input->is('[type=checkbox]') && !$input->is('[checked]')) - continue; - // jquery diff - if ($submit && $input->is('[type=submit]')) { - if ($submit instanceof DOMELEMENT && ! $input->elements[0]->isSameNode($submit)) - continue; - else if (is_string($submit) && $input->attr('name') != $submit) - continue; - } - $return[] = array( - 'name' => $input->attr('name'), - 'value' => $input->val(), - ); - } - return $return; - } - /** - * @access private - */ - protected function debug($in) { - if (! phpQuery::$debug ) - return; - print('
    ');
    -		print_r($in);
    -		// file debug
    -//		file_put_contents(dirname(__FILE__).'/phpQuery.log', print_r($in, true)."\n", FILE_APPEND);
    -		// quite handy debug trace
    -//		if ( is_array($in))
    -//			print_r(array_slice(debug_backtrace(), 3));
    -		print("
    \n"); - } - /** - * @access private - */ - protected function isRegexp($pattern) { - return in_array( - $pattern[ mb_strlen($pattern)-1 ], - array('^','*','$') - ); - } - /** - * Determines if $char is really a char. - * - * @param string $char - * @return bool - * @todo rewrite me to charcode range ! ;) - * @access private - */ - protected function isChar($char) { - return extension_loaded('mbstring') && phpQuery::$mbstringSupport - ? mb_eregi('\w', $char) - : preg_match('@\w@', $char); - } - /** - * @access private - */ - protected function parseSelector($query) { - // clean spaces - // TODO include this inside parsing ? - $query = trim( - preg_replace('@\s+@', ' ', - preg_replace('@\s*(>|\\+|~)\s*@', '\\1', $query) - ) - ); - $queries = array(array()); - if (! $query) - return $queries; - $return =& $queries[0]; - $specialChars = array('>',' '); -// $specialCharsMapping = array('/' => '>'); - $specialCharsMapping = array(); - $strlen = mb_strlen($query); - $classChars = array('.', '-'); - $pseudoChars = array('-'); - $tagChars = array('*', '|', '-'); - // split multibyte string - // http://code.google.com/p/phpquery/issues/detail?id=76 - $_query = array(); - for ($i=0; $i<$strlen; $i++) - $_query[] = mb_substr($query, $i, 1); - $query = $_query; - // it works, but i dont like it... - $i = 0; - while( $i < $strlen) { - $c = $query[$i]; - $tmp = ''; - // TAG - if ($this->isChar($c) || in_array($c, $tagChars)) { - while(isset($query[$i]) - && ($this->isChar($query[$i]) || in_array($query[$i], $tagChars))) { - $tmp .= $query[$i]; - $i++; - } - $return[] = $tmp; - // IDs - } else if ( $c == '#') { - $i++; - while( isset($query[$i]) && ($this->isChar($query[$i]) || $query[$i] == '-')) { - $tmp .= $query[$i]; - $i++; - } - $return[] = '#'.$tmp; - // SPECIAL CHARS - } else if (in_array($c, $specialChars)) { - $return[] = $c; - $i++; - // MAPPED SPECIAL MULTICHARS -// } else if ( $c.$query[$i+1] == '//') { -// $return[] = ' '; -// $i = $i+2; - // MAPPED SPECIAL CHARS - } else if ( isset($specialCharsMapping[$c])) { - $return[] = $specialCharsMapping[$c]; - $i++; - // COMMA - } else if ( $c == ',') { - $queries[] = array(); - $return =& $queries[ count($queries)-1 ]; - $i++; - while( isset($query[$i]) && $query[$i] == ' ') - $i++; - // CLASSES - } else if ($c == '.') { - while( isset($query[$i]) && ($this->isChar($query[$i]) || in_array($query[$i], $classChars))) { - $tmp .= $query[$i]; - $i++; - } - $return[] = $tmp; - // ~ General Sibling Selector - } else if ($c == '~') { - $spaceAllowed = true; - $tmp .= $query[$i++]; - while( isset($query[$i]) - && ($this->isChar($query[$i]) - || in_array($query[$i], $classChars) - || $query[$i] == '*' - || ($query[$i] == ' ' && $spaceAllowed) - )) { - if ($query[$i] != ' ') - $spaceAllowed = false; - $tmp .= $query[$i]; - $i++; - } - $return[] = $tmp; - // + Adjacent sibling selectors - } else if ($c == '+') { - $spaceAllowed = true; - $tmp .= $query[$i++]; - while( isset($query[$i]) - && ($this->isChar($query[$i]) - || in_array($query[$i], $classChars) - || $query[$i] == '*' - || ($spaceAllowed && $query[$i] == ' ') - )) { - if ($query[$i] != ' ') - $spaceAllowed = false; - $tmp .= $query[$i]; - $i++; - } - $return[] = $tmp; - // ATTRS - } else if ($c == '[') { - $stack = 1; - $tmp .= $c; - while( isset($query[++$i])) { - $tmp .= $query[$i]; - if ( $query[$i] == '[') { - $stack++; - } else if ( $query[$i] == ']') { - $stack--; - if (! $stack ) - break; - } - } - $return[] = $tmp; - $i++; - // PSEUDO CLASSES - } else if ($c == ':') { - $stack = 1; - $tmp .= $query[$i++]; - while( isset($query[$i]) && ($this->isChar($query[$i]) || in_array($query[$i], $pseudoChars))) { - $tmp .= $query[$i]; - $i++; - } - // with arguments ? - if ( isset($query[$i]) && $query[$i] == '(') { - $tmp .= $query[$i]; - $stack = 1; - while( isset($query[++$i])) { - $tmp .= $query[$i]; - if ( $query[$i] == '(') { - $stack++; - } else if ( $query[$i] == ')') { - $stack--; - if (! $stack ) - break; - } - } - $return[] = $tmp; - $i++; - } else { - $return[] = $tmp; - } - } else { - $i++; - } - } - foreach($queries as $k => $q) { - if (isset($q[0])) { - if (isset($q[0][0]) && $q[0][0] == ':') - array_unshift($queries[$k], '*'); - if ($q[0] != '>') - array_unshift($queries[$k], ' '); - } - } - return $queries; - } - /** - * Return matched DOM nodes. - * - * @param int $index - * @return array|DOMElement Single DOMElement or array of DOMElement. - */ - public function get($index = null, $callback1 = null, $callback2 = null, $callback3 = null) { - $return = isset($index) - ? (isset($this->elements[$index]) ? $this->elements[$index] : null) - : $this->elements; - // pass thou callbacks - $args = func_get_args(); - $args = array_slice($args, 1); - foreach($args as $callback) { - if (is_array($return)) - foreach($return as $k => $v) - $return[$k] = phpQuery::callbackRun($callback, array($v)); - else - $return = phpQuery::callbackRun($callback, array($return)); - } - return $return; - } - /** - * Return matched DOM nodes. - * jQuery difference. - * - * @param int $index - * @return array|string Returns string if $index != null - * @todo implement callbacks - * @todo return only arrays ? - * @todo maybe other name... - */ - public function getString($index = null, $callback1 = null, $callback2 = null, $callback3 = null) { - if ($index) - $return = $this->eq($index)->text(); - else { - $return = array(); - for($i = 0; $i < $this->size(); $i++) { - $return[] = $this->eq($i)->text(); - } - } - // pass thou callbacks - $args = func_get_args(); - $args = array_slice($args, 1); - foreach($args as $callback) { - $return = phpQuery::callbackRun($callback, array($return)); - } - return $return; - } - /** - * Return matched DOM nodes. - * jQuery difference. - * - * @param int $index - * @return array|string Returns string if $index != null - * @todo implement callbacks - * @todo return only arrays ? - * @todo maybe other name... - */ - public function getStrings($index = null, $callback1 = null, $callback2 = null, $callback3 = null) { - if ($index) - $return = $this->eq($index)->text(); - else { - $return = array(); - for($i = 0; $i < $this->size(); $i++) { - $return[] = $this->eq($i)->text(); - } - // pass thou callbacks - $args = func_get_args(); - $args = array_slice($args, 1); - } - foreach($args as $callback) { - if (is_array($return)) - foreach($return as $k => $v) - $return[$k] = phpQuery::callbackRun($callback, array($v)); - else - $return = phpQuery::callbackRun($callback, array($return)); - } - return $return; - } - /** - * Returns new instance of actual class. - * - * @param array $newStack Optional. Will replace old stack with new and move old one to history.c - */ - public function newInstance($newStack = null) { - $class = get_class($this); - // support inheritance by passing old object to overloaded constructor - $new = $class != 'phpQuery' - ? new $class($this, $this->getDocumentID()) - : new phpQueryObject($this->getDocumentID()); - $new->previous = $this; - if (is_null($newStack)) { - $new->elements = $this->elements; - if ($this->elementsBackup) - $this->elements = $this->elementsBackup; - } else if (is_string($newStack)) { - $new->elements = phpQuery::pq($newStack, $this->getDocumentID())->stack(); - } else { - $new->elements = $newStack; - } - return $new; - } - - /** - * 匹配class - * - * In the future, when PHP will support XLS 2.0, then we would do that this way: - * contains(tokenize(@class, '\s'), "something") - * @param unknown_type $class - * @param unknown_type $node - * @return boolean - * @access private - */ - protected function matchClasses($class, $node) { - // multi-class - if ( mb_strpos($class, '.', 1)) { - $classes = explode('.', substr($class, 1)); - $classesCount = count( $classes ); - $nodeClasses = explode(' ', $node->getAttribute('class') ); - $nodeClassesCount = count( $nodeClasses ); - if ( $classesCount > $nodeClassesCount ) - return false; - $diff = count( - array_diff( - $classes, - $nodeClasses - ) - ); - if (! $diff ) - return true; - // single-class - } else { - return in_array( - // strip leading dot from class name - substr($class, 1), - // get classes for element as array - explode(' ', $node->getAttribute('class') ) - ); - } - } - - /** - * @access private - */ - protected function runQuery($XQuery, $selector = null, $compare = null) { - if ($compare && ! method_exists($this, $compare)) - return false; - $stack = array(); - if (! $this->elements) - $this->debug('Stack empty, skipping...'); -// var_dump($this->elements[0]->nodeType); - // element, document - foreach($this->stack(array(1, 9, 13)) as $k => $stackNode) { - $detachAfter = false; - // to work on detached nodes we need temporary place them somewhere - // thats because context xpath queries sucks ;] - $testNode = $stackNode; - while ($testNode) { - if (! $testNode->parentNode && ! $this->isRoot($testNode)) { - $this->root->appendChild($testNode); - $detachAfter = $testNode; - break; - } - $testNode = isset($testNode->parentNode) - ? $testNode->parentNode - : null; - } - // XXX tmp ? - $xpath = $this->documentWrapper->isXHTML - ? $this->getNodeXpath($stackNode, 'html') - : $this->getNodeXpath($stackNode); - // FIXME pseudoclasses-only query, support XML - $query = $XQuery == '//' && $xpath == '/html[1]' - ? '//*' - : $xpath.$XQuery; - $this->debug("XPATH: {$query}"); - // run query, get elements - $nodes = $this->xpath->query($query); - $this->debug("QUERY FETCHED"); - if (! $nodes->length ) - $this->debug('Nothing found'); - $debug = array(); - foreach($nodes as $node) { - $matched = false; - if ( $compare) { - phpQuery::$debug ? - $this->debug("Found: ".$this->whois( $node ).", comparing with {$compare}()") - : null; - $phpQueryDebug = phpQuery::$debug; - phpQuery::$debug = false; - // TODO ??? use phpQuery::callbackRun() - if (call_user_func_array(array($this, $compare), array($selector, $node))) - $matched = true; - phpQuery::$debug = $phpQueryDebug; - } else { - $matched = true; - } - if ( $matched) { - if (phpQuery::$debug) - $debug[] = $this->whois( $node ); - $stack[] = $node; - } - } - if (phpQuery::$debug) { - $this->debug("Matched ".count($debug).": ".implode(', ', $debug)); - } - if ($detachAfter) - $this->root->removeChild($detachAfter); - } - $this->elements = $stack; - } - - /** - * Enter description here... - * css to xpath - * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery - */ - public function find($selectors, $context = null, $noHistory = false) { - if (!$noHistory) - // backup last stack /for end()/ - $this->elementsBackup = $this->elements; - // allow to define context - // TODO combine code below with phpQuery::pq() context guessing code - // as generic function - if ($context) { - if (! is_array($context) && $context instanceof DOMELEMENT) - $this->elements = array($context); - else if (is_array($context)) { - $this->elements = array(); - foreach ($context as $c) - if ($c instanceof DOMELEMENT) - $this->elements[] = $c; - } else if ( $context instanceof self ) - $this->elements = $context->elements; - } - $queries = $this->parseSelector($selectors); - $this->debug(array('FIND', $selectors, $queries)); - $XQuery = ''; - // remember stack state because of multi-queries - $oldStack = $this->elements; - // here we will be keeping found elements - $stack = array(); - foreach($queries as $selector) { - $this->elements = $oldStack; - $delimiterBefore = false; - foreach($selector as $s) { - // TAG - $isTag = extension_loaded('mbstring') && phpQuery::$mbstringSupport - ? mb_ereg_match('^[\w|\||-]+$', $s) || $s == '*' - : preg_match('@^[\w|\||-]+$@', $s) || $s == '*'; - if ($isTag) { - if ($this->isXML()) { - // namespace support - if (mb_strpos($s, '|') !== false) { - $ns = $tag = null; - list($ns, $tag) = explode('|', $s); - $XQuery .= "$ns:$tag"; - } else if ($s == '*') { - $XQuery .= "*"; - } else { - $XQuery .= "*[local-name()='$s']"; - } - } else { - $XQuery .= $s; - } - // ID - } else if ($s[0] == '#') { - if ($delimiterBefore) - $XQuery .= '*'; - $XQuery .= "[@id='".substr($s, 1)."']"; - // ATTRIBUTES - } else if ($s[0] == '[') { - if ($delimiterBefore) - $XQuery .= '*'; - // strip side brackets - $attr = trim($s, ']['); - $execute = false; - // attr with specifed value - if (mb_strpos($s, '=')) { - $value = null; - list($attr, $value) = explode('=', $attr); - $value = trim($value, "'\""); - if ($this->isRegexp($attr)) { - // cut regexp character - $attr = substr($attr, 0, -1); - $execute = true; - $XQuery .= "[@{$attr}]"; - } else { - $XQuery .= "[@{$attr}='{$value}']"; - } - // attr without specified value - } else { - $XQuery .= "[@{$attr}]"; - } - if ($execute) { - $this->runQuery($XQuery, $s, 'is'); - $XQuery = ''; - if (! $this->length()) - break; - } - // CLASSES - } else if ($s[0] == '.') { - // TODO use return $this->find("./self::*[contains(concat(\" \",@class,\" \"), \" $class \")]"); - // thx wizDom ;) - if ($delimiterBefore) - $XQuery .= '*'; - $XQuery .= '[@class]'; - $this->runQuery($XQuery, $s, 'matchClasses'); - $XQuery = ''; - if (! $this->length() ) - break; - // ~ General Sibling Selector - } else if ($s[0] == '~') { - $this->runQuery($XQuery); - $XQuery = ''; - $this->elements = $this - ->siblings( - substr($s, 1) - )->elements; - if (! $this->length() ) - break; - // + Adjacent sibling selectors - } else if ($s[0] == '+') { - // TODO /following-sibling:: - $this->runQuery($XQuery); - $XQuery = ''; - $subSelector = substr($s, 1); - $subElements = $this->elements; - $this->elements = array(); - foreach($subElements as $node) { - // search first DOMElement sibling - $test = $node->nextSibling; - while($test && ! ($test instanceof DOMELEMENT)) - $test = $test->nextSibling; - if ($test && $this->is($subSelector, $test)) - $this->elements[] = $test; - } - if (! $this->length() ) - break; - // PSEUDO CLASSES - } else if ($s[0] == ':') { - // TODO optimization for :first :last - if ($XQuery) { - $this->runQuery($XQuery); - $XQuery = ''; - } - if (! $this->length()) - break; - $this->pseudoClasses($s); - if (! $this->length()) - break; - // DIRECT DESCENDANDS - } else if ($s == '>') { - $XQuery .= '/'; - $delimiterBefore = 2; - // ALL DESCENDANDS - } else if ($s == ' ') { - $XQuery .= '//'; - $delimiterBefore = 2; - // ERRORS - } else { - phpQuery::debug("Unrecognized token '$s'"); - } - $delimiterBefore = $delimiterBefore === 2; - } - // run query if any - if ($XQuery && $XQuery != '//') { - $this->runQuery($XQuery); - $XQuery = ''; - } - foreach($this->elements as $node) - if (! $this->elementsContainsNode($node, $stack)) - $stack[] = $node; - } - $this->elements = $stack; - return $this->newInstance(); - } - /** - * @todo create API for classes with pseudoselectors - * @access private - */ - protected function pseudoClasses($class) { - // TODO clean args parsing ? - $class = ltrim($class, ':'); - $haveArgs = mb_strpos($class, '('); - if ($haveArgs !== false) { - $args = substr($class, $haveArgs+1, -1); - $class = substr($class, 0, $haveArgs); - } - switch($class) { - case 'even': - case 'odd': - $stack = array(); - foreach($this->elements as $i => $node) { - if ($class == 'even' && ($i%2) == 0) - $stack[] = $node; - else if ( $class == 'odd' && $i % 2 ) - $stack[] = $node; - } - $this->elements = $stack; - break; - case 'eq': - $k = intval($args); - $this->elements = isset( $this->elements[$k] ) - ? array( $this->elements[$k] ) - : array(); - break; - case 'gt': - $this->elements = array_slice($this->elements, $args+1); - break; - case 'lt': - $this->elements = array_slice($this->elements, 0, $args+1); - break; - case 'first': - if (isset($this->elements[0])) - $this->elements = array($this->elements[0]); - break; - case 'last': - if ($this->elements) - $this->elements = array($this->elements[count($this->elements)-1]); - break; - /*case 'parent': - $stack = array(); - foreach($this->elements as $node) { - if ( $node->childNodes->length ) - $stack[] = $node; - } - $this->elements = $stack; - break;*/ - case 'contains': - $text = trim($args, "\"'"); - $stack = array(); - foreach($this->elements as $node) { - if (mb_stripos($node->textContent, $text) === false) - continue; - $stack[] = $node; - } - $this->elements = $stack; - break; - case 'not': - $selector = self::unQuote($args); - $this->elements = $this->not($selector)->stack(); - break; - case 'slice': - // TODO jQuery difference ? - $args = explode(',', - str_replace(', ', ',', trim($args, "\"'")) - ); - $start = $args[0]; - $end = isset($args[1]) - ? $args[1] - : null; - if ($end > 0) - $end = $end-$start; - $this->elements = array_slice($this->elements, $start, $end); - break; - case 'has': - $selector = trim($args, "\"'"); - $stack = array(); - foreach($this->stack(1) as $el) { - if ($this->find($selector, $el, true)->length) - $stack[] = $el; - } - $this->elements = $stack; - break; - case 'submit': - case 'reset': - $this->elements = phpQuery::merge( - $this->map(array($this, 'is'), - "input[type=$class]", new CallbackParam() - ), - $this->map(array($this, 'is'), - "button[type=$class]", new CallbackParam() - ) - ); - break; -// $stack = array(); -// foreach($this->elements as $node) -// if ($node->is('input[type=submit]') || $node->is('button[type=submit]')) -// $stack[] = $el; -// $this->elements = $stack; - case 'input': - $this->elements = $this->map( - array($this, 'is'), - 'input', new CallbackParam() - )->elements; - break; - case 'password': - case 'checkbox': - case 'radio': - case 'hidden': - case 'image': - case 'file': - $this->elements = $this->map( - array($this, 'is'), - "input[type=$class]", new CallbackParam() - )->elements; - break; - case 'parent': - $this->elements = $this->map( - create_function('$node', ' - return $node instanceof DOMELEMENT && $node->childNodes->length - ? $node : null;') - )->elements; - break; - case 'empty': - $this->elements = $this->map( - create_function('$node', ' - return $node instanceof DOMELEMENT && $node->childNodes->length - ? null : $node;') - )->elements; - break; - case 'disabled': - case 'selected': - case 'checked': - $this->elements = $this->map( - array($this, 'is'), - "[$class]", new CallbackParam() - )->elements; - break; - case 'enabled': - $this->elements = $this->map( - create_function('$node', ' - return pq($node)->not(":disabled") ? $node : null;') - )->elements; - break; - case 'header': - $this->elements = $this->map( - create_function('$node', - '$isHeader = isset($node->tagName) && in_array($node->tagName, array( - "h1", "h2", "h3", "h4", "h5", "h6", "h7" - )); - return $isHeader - ? $node - : null;') - )->elements; -// $this->elements = $this->map( -// create_function('$node', '$node = pq($node); -// return $node->is("h1") -// || $node->is("h2") -// || $node->is("h3") -// || $node->is("h4") -// || $node->is("h5") -// || $node->is("h6") -// || $node->is("h7") -// ? $node -// : null;') -// )->elements; - break; - case 'only-child': - $this->elements = $this->map( - create_function('$node', - 'return pq($node)->siblings()->size() == 0 ? $node : null;') - )->elements; - break; - case 'first-child': - $this->elements = $this->map( - create_function('$node', 'return pq($node)->prevAll()->size() == 0 ? $node : null;') - )->elements; - break; - case 'last-child': - $this->elements = $this->map( - create_function('$node', 'return pq($node)->nextAll()->size() == 0 ? $node : null;') - )->elements; - break; - case 'nth-child': - $param = trim($args, "\"'"); - if (! $param) - break; - // nth-child(n+b) to nth-child(1n+b) - if ($param{0} == 'n') - $param = '1'.$param; - // :nth-child(index/even/odd/equation) - if ($param == 'even' || $param == 'odd') - $mapped = $this->map( - create_function('$node, $param', - '$index = pq($node)->prevAll()->size()+1; - if ($param == "even" && ($index%2) == 0) - return $node; - else if ($param == "odd" && $index%2 == 1) - return $node; - else - return null;'), - new CallbackParam(), $param - ); - else if (mb_strlen($param) > 1 && $param{1} == 'n') - // an+b - $mapped = $this->map( - create_function('$node, $param', - '$prevs = pq($node)->prevAll()->size(); - $index = 1+$prevs; - $b = mb_strlen($param) > 3 - ? $param{3} - : 0; - $a = $param{0}; - if ($b && $param{2} == "-") - $b = -$b; - if ($a > 0) { - return ($index-$b)%$a == 0 - ? $node - : null; - phpQuery::debug($a."*".floor($index/$a)."+$b-1 == ".($a*floor($index/$a)+$b-1)." ?= $prevs"); - return $a*floor($index/$a)+$b-1 == $prevs - ? $node - : null; - } else if ($a == 0) - return $index == $b - ? $node - : null; - else - // negative value - return $index <= $b - ? $node - : null; -// if (! $b) -// return $index%$a == 0 -// ? $node -// : null; -// else -// return ($index-$b)%$a == 0 -// ? $node -// : null; - '), - new CallbackParam(), $param - ); - else - // index - $mapped = $this->map( - create_function('$node, $index', - '$prevs = pq($node)->prevAll()->size(); - if ($prevs && $prevs == $index-1) - return $node; - else if (! $prevs && $index == 1) - return $node; - else - return null;'), - new CallbackParam(), $param - ); - $this->elements = $mapped->elements; - break; - default: - $this->debug("Unknown pseudoclass '{$class}', skipping..."); - } - } - /** - * @access private - */ - protected function __pseudoClassParam($paramsString) { - // TODO; - } - /** - * Enter description here... - * - * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery - */ - public function is($selector, $nodes = null) { - phpQuery::debug(array("Is:", $selector)); - if (! $selector) - return false; - $oldStack = $this->elements; - $returnArray = false; - if ($nodes && is_array($nodes)) { - $this->elements = $nodes; - } else if ($nodes) - $this->elements = array($nodes); - $this->filter($selector, true); - $stack = $this->elements; - $this->elements = $oldStack; - if ($nodes) - return $stack ? $stack : null; - return (bool)count($stack); - } - /** - * Enter description here... - * jQuery difference. - * - * Callback: - * - $index int - * - $node DOMNode - * - * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery - * @link http://docs.jquery.com/Traversing/filter - */ - public function filterCallback($callback, $_skipHistory = false) { - if (! $_skipHistory) { - $this->elementsBackup = $this->elements; - $this->debug("Filtering by callback"); - } - $newStack = array(); - foreach($this->elements as $index => $node) { - $result = phpQuery::callbackRun($callback, array($index, $node)); - if (is_null($result) || (! is_null($result) && $result)) - $newStack[] = $node; - } - $this->elements = $newStack; - return $_skipHistory - ? $this - : $this->newInstance(); - } - /** - * Enter description here... - * - * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery - * @link http://docs.jquery.com/Traversing/filter - */ - public function filter($selectors, $_skipHistory = false) { - if ($selectors instanceof Callback OR $selectors instanceof Closure) - return $this->filterCallback($selectors, $_skipHistory); - if (! $_skipHistory) - $this->elementsBackup = $this->elements; - $notSimpleSelector = array(' ', '>', '~', '+', '/'); - if (! is_array($selectors)) - $selectors = $this->parseSelector($selectors); - if (! $_skipHistory) - $this->debug(array("Filtering:", $selectors)); - $finalStack = array(); - foreach($selectors as $selector) { - $stack = array(); - if (! $selector) - break; - // avoid first space or / - if (in_array($selector[0], $notSimpleSelector)) - $selector = array_slice($selector, 1); - // PER NODE selector chunks - foreach($this->stack() as $node) { - $break = false; - foreach($selector as $s) { - if (!($node instanceof DOMELEMENT)) { - // all besides DOMElement - if ( $s[0] == '[') { - $attr = trim($s, '[]'); - if ( mb_strpos($attr, '=')) { - list( $attr, $val ) = explode('=', $attr); - if ($attr == 'nodeType' && $node->nodeType != $val) - $break = true; - } - } else - $break = true; - } else { - // DOMElement only - // ID - if ( $s[0] == '#') { - if ( $node->getAttribute('id') != substr($s, 1) ) - $break = true; - // CLASSES - } else if ( $s[0] == '.') { - if (! $this->matchClasses( $s, $node ) ) - $break = true; - // ATTRS - } else if ( $s[0] == '[') { - // strip side brackets - $attr = trim($s, '[]'); - if (mb_strpos($attr, '=')) { - list($attr, $val) = explode('=', $attr); - $val = self::unQuote($val); - if ($attr == 'nodeType') { - if ($val != $node->nodeType) - $break = true; - } else if ($this->isRegexp($attr)) { - $val = extension_loaded('mbstring') && phpQuery::$mbstringSupport - ? quotemeta(trim($val, '"\'')) - : preg_quote(trim($val, '"\''), '@'); - // switch last character - switch( substr($attr, -1)) { - // quotemeta used insted of preg_quote - // http://code.google.com/p/phpquery/issues/detail?id=76 - case '^': - $pattern = '^'.$val; - break; - case '*': - $pattern = '.*'.$val.'.*'; - break; - case '$': - $pattern = '.*'.$val.'$'; - break; - } - // cut last character - $attr = substr($attr, 0, -1); - $isMatch = extension_loaded('mbstring') && phpQuery::$mbstringSupport - ? mb_ereg_match($pattern, $node->getAttribute($attr)) - : preg_match("@{$pattern}@", $node->getAttribute($attr)); - if (! $isMatch) - $break = true; - } else if ($node->getAttribute($attr) != $val) - $break = true; - } else if (! $node->hasAttribute($attr)) - $break = true; - // PSEUDO CLASSES - } else if ( $s[0] == ':') { - // skip - // TAG - } else if (trim($s)) { - if ($s != '*') { - // TODO namespaces - if (isset($node->tagName)) { - if ($node->tagName != $s) - $break = true; - } else if ($s == 'html' && ! $this->isRoot($node)) - $break = true; - } - // AVOID NON-SIMPLE SELECTORS - } else if (in_array($s, $notSimpleSelector)) { - $break = true; - $this->debug(array('Skipping non simple selector', $selector)); - } - } - if ($break) - break; - } - // if element passed all chunks of selector - add it to new stack - if (! $break ) - $stack[] = $node; - } - $tmpStack = $this->elements; - $this->elements = $stack; - // PER ALL NODES selector chunks - foreach($selector as $s) - // PSEUDO CLASSES - if ($s[0] == ':') - $this->pseudoClasses($s); - foreach($this->elements as $node) - // XXX it should be merged without duplicates - // but jQuery doesnt do that - $finalStack[] = $node; - $this->elements = $tmpStack; - } - $this->elements = $finalStack; - if ($_skipHistory) { - return $this; - } else { - $this->debug("Stack length after filter(): ".count($finalStack)); - return $this->newInstance(); - } - } - /** - * - * @param $value - * @return unknown_type - * @TODO implement in all methods using passed parameters - */ - protected static function unQuote($value) { - return $value[0] == '\'' || $value[0] == '"' - ? substr($value, 1, -1) - : $value; - } - /** - * Enter description here... - * - * @link http://docs.jquery.com/Ajax/load - * @return phpQuery|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery - * @todo Support $selector - */ - public function load($url, $data = null, $callback = null) { - if ($data && ! is_array($data)) { - $callback = $data; - $data = null; - } - if (mb_strpos($url, ' ') !== false) { - $matches = null; - if (extension_loaded('mbstring') && phpQuery::$mbstringSupport) - mb_ereg('^([^ ]+) (.*)$', $url, $matches); - else - preg_match('^([^ ]+) (.*)$', $url, $matches); - $url = $matches[1]; - $selector = $matches[2]; - // FIXME this sucks, pass as callback param - $this->_loadSelector = $selector; - } - $ajax = array( - 'url' => $url, - 'type' => $data ? 'POST' : 'GET', - 'data' => $data, - 'complete' => $callback, - 'success' => array($this, '__loadSuccess') - ); - phpQuery::ajax($ajax); - return $this; - } - /** - * @access private - * @param $html - * @return unknown_type - */ - public function __loadSuccess($html) { - if ($this->_loadSelector) { - $html = phpQuery::newDocument($html)->find($this->_loadSelector); - unset($this->_loadSelector); - } - foreach($this->stack(1) as $node) { - phpQuery::pq($node, $this->getDocumentID()) - ->markup($html); - } - } - /** - * Enter description here... - * - * @return phpQuery|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery - * @todo - */ - public function css() { - // TODO - return $this; - } - /** - * @todo - * - */ - public function show(){ - // TODO - return $this; - } - /** - * @todo - * - */ - public function hide(){ - // TODO - return $this; - } - /** - * Trigger a type of event on every matched element. - * - * @param unknown_type $type - * @param unknown_type $data - * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery - * @TODO support more than event in $type (space-separated) - */ - public function trigger($type, $data = array()) { - foreach($this->elements as $node) - phpQueryEvents::trigger($this->getDocumentID(), $type, $data, $node); - return $this; - } - /** - * This particular method triggers all bound event handlers on an element (for a specific event type) WITHOUT executing the browsers default actions. - * - * @param unknown_type $type - * @param unknown_type $data - * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery - * @TODO - */ - public function triggerHandler($type, $data = array()) { - // TODO; - } - /** - * Binds a handler to one or more events (like click) for each matched element. - * Can also bind custom events. - * - * @param unknown_type $type - * @param unknown_type $data Optional - * @param unknown_type $callback - * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery - * @TODO support '!' (exclusive) events - * @TODO support more than event in $type (space-separated) - */ - public function bind($type, $data, $callback = null) { - // TODO check if $data is callable, not using is_callable - if (! isset($callback)) { - $callback = $data; - $data = null; - } - foreach($this->elements as $node) - phpQueryEvents::add($this->getDocumentID(), $node, $type, $data, $callback); - return $this; - } - /** - * Enter description here... - * - * @param unknown_type $type - * @param unknown_type $callback - * @return unknown - * @TODO namespace events - * @TODO support more than event in $type (space-separated) - */ - public function unbind($type = null, $callback = null) { - foreach($this->elements as $node) - phpQueryEvents::remove($this->getDocumentID(), $node, $type, $callback); - return $this; - } - /** - * Enter description here... - * - * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery - */ - public function change($callback = null) { - if ($callback) - return $this->bind('change', $callback); - return $this->trigger('change'); - } - /** - * Enter description here... - * - * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery - */ - public function submit($callback = null) { - if ($callback) - return $this->bind('submit', $callback); - return $this->trigger('submit'); - } - /** - * Enter description here... - * - * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery - */ - public function click($callback = null) { - if ($callback) - return $this->bind('click', $callback); - return $this->trigger('click'); - } - /** - * Enter description here... - * - * @param String|phpQuery - * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery - */ - public function wrapAllOld($wrapper) { - $wrapper = pq($wrapper)->_clone(); - if (! $wrapper->length() || ! $this->length() ) - return $this; - $wrapper->insertBefore($this->elements[0]); - $deepest = $wrapper->elements[0]; - while($deepest->firstChild && $deepest->firstChild instanceof DOMELEMENT) - $deepest = $deepest->firstChild; - pq($deepest)->append($this); - return $this; - } - /** - * Enter description here... - * - * TODO testme... - * @param String|phpQuery - * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery - */ - public function wrapAll($wrapper) { - if (! $this->length()) - return $this; - return phpQuery::pq($wrapper, $this->getDocumentID()) - ->clone() - ->insertBefore($this->get(0)) - ->map(array($this, '___wrapAllCallback')) - ->append($this); - } - /** - * - * @param $node - * @return unknown_type - * @access private - */ - public function ___wrapAllCallback($node) { - $deepest = $node; - while($deepest->firstChild && $deepest->firstChild instanceof DOMELEMENT) - $deepest = $deepest->firstChild; - return $deepest; - } - /** - * Enter description here... - * NON JQUERY METHOD - * - * @param String|phpQuery - * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery - */ - public function wrapAllPHP($codeBefore, $codeAfter) { - return $this - ->slice(0, 1) - ->beforePHP($codeBefore) - ->end() - ->slice(-1) - ->afterPHP($codeAfter) - ->end(); - } - /** - * Enter description here... - * - * @param String|phpQuery - * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery - */ - public function wrap($wrapper) { - foreach($this->stack() as $node) - phpQuery::pq($node, $this->getDocumentID())->wrapAll($wrapper); - return $this; - } - /** - * Enter description here... - * - * @param String|phpQuery - * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery - */ - public function wrapPHP($codeBefore, $codeAfter) { - foreach($this->stack() as $node) - phpQuery::pq($node, $this->getDocumentID())->wrapAllPHP($codeBefore, $codeAfter); - return $this; - } - /** - * Enter description here... - * - * @param String|phpQuery - * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery - */ - public function wrapInner($wrapper) { - foreach($this->stack() as $node) - phpQuery::pq($node, $this->getDocumentID())->contents()->wrapAll($wrapper); - return $this; - } - /** - * Enter description here... - * - * @param String|phpQuery - * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery - */ - public function wrapInnerPHP($codeBefore, $codeAfter) { - foreach($this->stack(1) as $node) - phpQuery::pq($node, $this->getDocumentID())->contents() - ->wrapAllPHP($codeBefore, $codeAfter); - return $this; - } - /** - * Enter description here... - * - * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery - * @testme Support for text nodes - */ - public function contents() { - $stack = array(); - foreach($this->stack(1) as $el) { - // FIXME (fixed) http://code.google.com/p/phpquery/issues/detail?id=56 -// if (! isset($el->childNodes)) -// continue; - foreach($el->childNodes as $node) { - $stack[] = $node; - } - } - return $this->newInstance($stack); - } - /** - * Enter description here... - * - * jQuery difference. - * - * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery - */ - public function contentsUnwrap() { - foreach($this->stack(1) as $node) { - if (! $node->parentNode ) - continue; - $childNodes = array(); - // any modification in DOM tree breaks childNodes iteration, so cache them first - foreach($node->childNodes as $chNode ) - $childNodes[] = $chNode; - foreach($childNodes as $chNode ) -// $node->parentNode->appendChild($chNode); - $node->parentNode->insertBefore($chNode, $node); - $node->parentNode->removeChild($node); - } - return $this; - } - /** - * Enter description here... - * - * jQuery difference. - */ - public function switchWith($markup) { - $markup = pq($markup, $this->getDocumentID()); - $content = null; - foreach($this->stack(1) as $node) { - pq($node) - ->contents()->toReference($content)->end() - ->replaceWith($markup->clone()->append($content)); - } - return $this; - } - /** - * Enter description here... - * - * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery - */ - public function eq($num) { - $oldStack = $this->elements; - $this->elementsBackup = $this->elements; - $this->elements = array(); - if ( isset($oldStack[$num]) ) - $this->elements[] = $oldStack[$num]; - return $this->newInstance(); - } - /** - * Enter description here... - * - * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery - */ - public function size() { - return count($this->elements); - } - /** - * Enter description here... - * - * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery - * @deprecated Use length as attribute - */ - public function length() { - return $this->size(); - } - public function count() { - return $this->size(); - } - /** - * Enter description here... - * - * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery - * @todo $level - */ - public function end($level = 1) { -// $this->elements = array_pop( $this->history ); -// return $this; -// $this->previous->DOM = $this->DOM; -// $this->previous->XPath = $this->XPath; - return $this->previous - ? $this->previous - : $this; - } - /** - * Enter description here... - * Normal use ->clone() . - * - * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery - * @access private - */ - public function _clone() { - $newStack = array(); - //pr(array('copy... ', $this->whois())); - //$this->dumpHistory('copy'); - $this->elementsBackup = $this->elements; - foreach($this->elements as $node) { - $newStack[] = $node->cloneNode(true); - } - $this->elements = $newStack; - return $this->newInstance(); - } - /** - * Enter description here... - * - * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery - */ - public function replaceWithPHP($code) { - return $this->replaceWith(phpQuery::php($code)); - } - /** - * Enter description here... - * - * @param String|phpQuery $content - * @link http://docs.jquery.com/Manipulation/replaceWith#content - * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery - */ - public function replaceWith($content) { - return $this->after($content)->remove(); - } - /** - * Enter description here... - * - * @param String $selector - * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery - * @todo this works ? - */ - public function replaceAll($selector) { - foreach(phpQuery::pq($selector, $this->getDocumentID()) as $node) - phpQuery::pq($node, $this->getDocumentID()) - ->after($this->_clone()) - ->remove(); - return $this; - } - /** - * Enter description here... - * - * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery - */ - public function remove($selector = null) { - $loop = $selector - ? $this->filter($selector)->elements - : $this->elements; - foreach($loop as $node) { - if (! $node->parentNode ) - continue; - if (isset($node->tagName)) - $this->debug("Removing '{$node->tagName}'"); - $node->parentNode->removeChild($node); - // Mutation event - $event = new DOMEvent(array( - 'target' => $node, - 'type' => 'DOMNodeRemoved' - )); - phpQueryEvents::trigger($this->getDocumentID(), - $event->type, array($event), $node - ); - } - return $this; - } - protected function markupEvents($newMarkup, $oldMarkup, $node) { - if ($node->tagName == 'textarea' && $newMarkup != $oldMarkup) { - $event = new DOMEvent(array( - 'target' => $node, - 'type' => 'change' - )); - phpQueryEvents::trigger($this->getDocumentID(), - $event->type, array($event), $node - ); - } - } - /** - * jQuey difference - * - * @param $markup - * @return unknown_type - * @TODO trigger change event for textarea - */ - public function markup($markup = null, $callback1 = null, $callback2 = null, $callback3 = null) { - $args = func_get_args(); - if ($this->documentWrapper->isXML) - return call_user_func_array(array($this, 'xml'), $args); - else - return call_user_func_array(array($this, 'html'), $args); - } - /** - * jQuey difference - * - * @param $markup - * @return unknown_type - */ - public function markupOuter($callback1 = null, $callback2 = null, $callback3 = null) { - $args = func_get_args(); - if ($this->documentWrapper->isXML) - return call_user_func_array(array($this, 'xmlOuter'), $args); - else - return call_user_func_array(array($this, 'htmlOuter'), $args); - } - /** - * Enter description here... - * - * @param unknown_type $html - * @return string|phpQuery|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery - * @TODO force html result - */ - public function html($html = null, $callback1 = null, $callback2 = null, $callback3 = null) { - if (isset($html)) { - // INSERT - $nodes = $this->documentWrapper->import($html); - $this->empty(); - foreach($this->stack(1) as $alreadyAdded => $node) { - // for now, limit events for textarea - if (($this->isXHTML() || $this->isHTML()) && $node->tagName == 'textarea') - $oldHtml = pq($node, $this->getDocumentID())->markup(); - foreach($nodes as $newNode) { - $node->appendChild($alreadyAdded - ? $newNode->cloneNode(true) - : $newNode - ); - } - // for now, limit events for textarea - if (($this->isXHTML() || $this->isHTML()) && $node->tagName == 'textarea') - $this->markupEvents($html, $oldHtml, $node); - } - return $this; - } else { - // FETCH - $return = $this->documentWrapper->markup($this->elements, true); - $args = func_get_args(); - foreach(array_slice($args, 1) as $callback) { - $return = phpQuery::callbackRun($callback, array($return)); - } - return $return; - } - } - /** - * @TODO force xml result - */ - public function xml($xml = null, $callback1 = null, $callback2 = null, $callback3 = null) { - $args = func_get_args(); - return call_user_func_array(array($this, 'html'), $args); - } - /** - * Enter description here... - * @TODO force html result - * - * @return String - */ - public function htmlOuter($callback1 = null, $callback2 = null, $callback3 = null) { - $markup = $this->documentWrapper->markup($this->elements); - // pass thou callbacks - $args = func_get_args(); - foreach($args as $callback) { - $markup = phpQuery::callbackRun($callback, array($markup)); - } - return $markup; - } - /** - * @TODO force xml result - */ - public function xmlOuter($callback1 = null, $callback2 = null, $callback3 = null) { - $args = func_get_args(); - return call_user_func_array(array($this, 'htmlOuter'), $args); - } - public function __toString() { - return $this->markupOuter(); - } - /** - * Just like html(), but returns markup with VALID (dangerous) PHP tags. - * - * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery - * @todo support returning markup with PHP tags when called without param - */ - public function php($code = null) { - return $this->markupPHP($code); - } - /** - * Enter description here... - * - * @param $code - * @return unknown_type - */ - public function markupPHP($code = null) { - return isset($code) - ? $this->markup(phpQuery::php($code)) - : phpQuery::markupToPHP($this->markup()); - } - /** - * Enter description here... - * - * @param $code - * @return unknown_type - */ - public function markupOuterPHP() { - return phpQuery::markupToPHP($this->markupOuter()); - } - /** - * Enter description here... - * - * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery - */ - public function children($selector = null) { - $stack = array(); - foreach($this->stack(1) as $node) { -// foreach($node->getElementsByTagName('*') as $newNode) { - foreach($node->childNodes as $newNode) { - if ($newNode->nodeType != 1) - continue; - if ($selector && ! $this->is($selector, $newNode)) - continue; - if ($this->elementsContainsNode($newNode, $stack)) - continue; - $stack[] = $newNode; - } - } - $this->elementsBackup = $this->elements; - $this->elements = $stack; - return $this->newInstance(); - } - /** - * Enter description here... - * - * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery - */ - public function ancestors($selector = null) { - return $this->children( $selector ); - } - /** - * Enter description here... - * - * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery - */ - public function append( $content) { - return $this->insert($content, __FUNCTION__); - } - /** - * Enter description here... - * - * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery - */ - public function appendPHP( $content) { - return $this->insert("", 'append'); - } - /** - * Enter description here... - * - * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery - */ - public function appendTo( $seletor) { - return $this->insert($seletor, __FUNCTION__); - } - /** - * Enter description here... - * - * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery - */ - public function prepend( $content) { - return $this->insert($content, __FUNCTION__); - } - /** - * Enter description here... - * - * @todo accept many arguments, which are joined, arrays maybe also - * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery - */ - public function prependPHP( $content) { - return $this->insert("", 'prepend'); - } - /** - * Enter description here... - * - * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery - */ - public function prependTo( $seletor) { - return $this->insert($seletor, __FUNCTION__); - } - /** - * Enter description here... - * - * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery - */ - public function before($content) { - return $this->insert($content, __FUNCTION__); - } - /** - * Enter description here... - * - * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery - */ - public function beforePHP( $content) { - return $this->insert("", 'before'); - } - /** - * Enter description here... - * - * @param String|phpQuery - * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery - */ - public function insertBefore( $seletor) { - return $this->insert($seletor, __FUNCTION__); - } - /** - * Enter description here... - * - * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery - */ - public function after( $content) { - return $this->insert($content, __FUNCTION__); - } - /** - * Enter description here... - * - * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery - */ - public function afterPHP( $content) { - return $this->insert("", 'after'); - } - /** - * Enter description here... - * - * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery - */ - public function insertAfter( $seletor) { - return $this->insert($seletor, __FUNCTION__); - } - /** - * Internal insert method. Don't use it. - * - * @param unknown_type $target - * @param unknown_type $type - * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery - * @access private - */ - public function insert($target, $type) { - $this->debug("Inserting data with '{$type}'"); - $to = false; - switch( $type) { - case 'appendTo': - case 'prependTo': - case 'insertBefore': - case 'insertAfter': - $to = true; - } - switch(gettype($target)) { - case 'string': - $insertFrom = $insertTo = array(); - if ($to) { - // INSERT TO - $insertFrom = $this->elements; - if (phpQuery::isMarkup($target)) { - // $target is new markup, import it - $insertTo = $this->documentWrapper->import($target); - // insert into selected element - } else { - // $tagret is a selector - $thisStack = $this->elements; - $this->toRoot(); - $insertTo = $this->find($target)->elements; - $this->elements = $thisStack; - } - } else { - // INSERT FROM - $insertTo = $this->elements; - $insertFrom = $this->documentWrapper->import($target); - } - break; - case 'object': - $insertFrom = $insertTo = array(); - // phpQuery - if ($target instanceof self) { - if ($to) { - $insertTo = $target->elements; - if ($this->documentFragment && $this->stackIsRoot()) - // get all body children -// $loop = $this->find('body > *')->elements; - // TODO test it, test it hard... -// $loop = $this->newInstance($this->root)->find('> *')->elements; - $loop = $this->root->childNodes; - else - $loop = $this->elements; - // import nodes if needed - $insertFrom = $this->getDocumentID() == $target->getDocumentID() - ? $loop - : $target->documentWrapper->import($loop); - } else { - $insertTo = $this->elements; - if ( $target->documentFragment && $target->stackIsRoot() ) - // get all body children -// $loop = $target->find('body > *')->elements; - $loop = $target->root->childNodes; - else - $loop = $target->elements; - // import nodes if needed - $insertFrom = $this->getDocumentID() == $target->getDocumentID() - ? $loop - : $this->documentWrapper->import($loop); - } - // DOMNODE - } elseif ($target instanceof DOMNODE) { - // import node if needed -// if ( $target->ownerDocument != $this->DOM ) -// $target = $this->DOM->importNode($target, true); - if ( $to) { - $insertTo = array($target); - if ($this->documentFragment && $this->stackIsRoot()) - // get all body children - $loop = $this->root->childNodes; -// $loop = $this->find('body > *')->elements; - else - $loop = $this->elements; - foreach($loop as $fromNode) - // import nodes if needed - $insertFrom[] = ! $fromNode->ownerDocument->isSameNode($target->ownerDocument) - ? $target->ownerDocument->importNode($fromNode, true) - : $fromNode; - } else { - // import node if needed - if (! $target->ownerDocument->isSameNode($this->document)) - $target = $this->document->importNode($target, true); - $insertTo = $this->elements; - $insertFrom[] = $target; - } - } - break; - } - phpQuery::debug("From ".count($insertFrom)."; To ".count($insertTo)." nodes"); - foreach($insertTo as $insertNumber => $toNode) { - // we need static relative elements in some cases - switch( $type) { - case 'prependTo': - case 'prepend': - $firstChild = $toNode->firstChild; - break; - case 'insertAfter': - case 'after': - $nextSibling = $toNode->nextSibling; - break; - } - foreach($insertFrom as $fromNode) { - // clone if inserted already before - $insert = $insertNumber - ? $fromNode->cloneNode(true) - : $fromNode; - switch($type) { - case 'appendTo': - case 'append': -// $toNode->insertBefore( -// $fromNode, -// $toNode->lastChild->nextSibling -// ); - $toNode->appendChild($insert); - $eventTarget = $insert; - break; - case 'prependTo': - case 'prepend': - $toNode->insertBefore( - $insert, - $firstChild - ); - break; - case 'insertBefore': - case 'before': - if (! $toNode->parentNode) - throw new Exception("No parentNode, can't do {$type}()"); - else - $toNode->parentNode->insertBefore( - $insert, - $toNode - ); - break; - case 'insertAfter': - case 'after': - if (! $toNode->parentNode) - throw new Exception("No parentNode, can't do {$type}()"); - else - $toNode->parentNode->insertBefore( - $insert, - $nextSibling - ); - break; - } - // Mutation event - $event = new DOMEvent(array( - 'target' => $insert, - 'type' => 'DOMNodeInserted' - )); - phpQueryEvents::trigger($this->getDocumentID(), - $event->type, array($event), $insert - ); - } - } - return $this; - } - /** - * Enter description here... - * - * @return Int - */ - public function index($subject) { - $index = -1; - $subject = $subject instanceof phpQueryObject - ? $subject->elements[0] - : $subject; - foreach($this->newInstance() as $k => $node) { - if ($node->isSameNode($subject)) - $index = $k; - } - return $index; - } - /** - * Enter description here... - * - * @param unknown_type $start - * @param unknown_type $end - * - * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery - * @testme - */ - public function slice($start, $end = null) { -// $last = count($this->elements)-1; -// $end = $end -// ? min($end, $last) -// : $last; -// if ($start < 0) -// $start = $last+$start; -// if ($start > $last) -// return array(); - if ($end > 0) - $end = $end-$start; - return $this->newInstance( - array_slice($this->elements, $start, $end) - ); - } - /** - * Enter description here... - * - * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery - */ - public function reverse() { - $this->elementsBackup = $this->elements; - $this->elements = array_reverse($this->elements); - return $this->newInstance(); - } - /** - * Return joined text content. - * @return String - */ - public function text($text = null, $callback1 = null, $callback2 = null, $callback3 = null) { - if (isset($text)) - return $this->html(htmlspecialchars($text)); - $args = func_get_args(); - $args = array_slice($args, 1); - $return = ''; - foreach($this->elements as $node) { - $text = $node->textContent; - if (count($this->elements) > 1 && $text) - $text .= "\n"; - foreach($args as $callback) { - $text = phpQuery::callbackRun($callback, array($text)); - } - $return .= $text; - } - return $return; - } - /** - * Enter description here... - * - * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery - */ - public function plugin($class, $file = null) { - phpQuery::plugin($class, $file); - return $this; - } - /** - * Deprecated, use $pq->plugin() instead. - * - * @deprecated - * @param $class - * @param $file - * @return unknown_type - */ - public static function extend($class, $file = null) { - return $this->plugin($class, $file); - } - /** - * - * @access private - * @param $method - * @param $args - * @return unknown_type - */ - public function __call($method, $args) { - $aliasMethods = array('clone', 'empty'); - if (isset(phpQuery::$extendMethods[$method])) { - array_unshift($args, $this); - return phpQuery::callbackRun( - phpQuery::$extendMethods[$method], $args - ); - } else if (isset(phpQuery::$pluginsMethods[$method])) { - array_unshift($args, $this); - $class = phpQuery::$pluginsMethods[$method]; - $realClass = "phpQueryObjectPlugin_$class"; - $return = call_user_func_array( - array($realClass, $method), - $args - ); - // XXX deprecate ? - return is_null($return) - ? $this - : $return; - } else if (in_array($method, $aliasMethods)) { - return call_user_func_array(array($this, '_'.$method), $args); - } else - throw new Exception("Method '{$method}' doesnt exist"); - } - /** - * Safe rename of next(). - * - * Use it ONLY when need to call next() on an iterated object (in same time). - * Normaly there is no need to do such thing ;) - * - * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery - * @access private - */ - public function _next($selector = null) { - return $this->newInstance( - $this->getElementSiblings('nextSibling', $selector, true) - ); - } - /** - * Use prev() and next(). - * - * @deprecated - * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery - * @access private - */ - public function _prev($selector = null) { - return $this->prev($selector); - } - /** - * Enter description here... - * - * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery - */ - public function prev($selector = null) { - return $this->newInstance( - $this->getElementSiblings('previousSibling', $selector, true) - ); - } - /** - * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery - * @todo - */ - public function prevAll($selector = null) { - return $this->newInstance( - $this->getElementSiblings('previousSibling', $selector) - ); - } - /** - * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery - * @todo FIXME: returns source elements insted of next siblings - */ - public function nextAll($selector = null) { - return $this->newInstance( - $this->getElementSiblings('nextSibling', $selector) - ); - } - /** - * @access private - */ - protected function getElementSiblings($direction, $selector = null, $limitToOne = false) { - $stack = array(); - $count = 0; - foreach($this->stack() as $node) { - $test = $node; - while( isset($test->{$direction}) && $test->{$direction}) { - $test = $test->{$direction}; - if (! $test instanceof DOMELEMENT) - continue; - $stack[] = $test; - if ($limitToOne) - break; - } - } - if ($selector) { - $stackOld = $this->elements; - $this->elements = $stack; - $stack = $this->filter($selector, true)->stack(); - $this->elements = $stackOld; - } - return $stack; - } - /** - * Enter description here... - * - * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery - */ - public function siblings($selector = null) { - $stack = array(); - $siblings = array_merge( - $this->getElementSiblings('previousSibling', $selector), - $this->getElementSiblings('nextSibling', $selector) - ); - foreach($siblings as $node) { - if (! $this->elementsContainsNode($node, $stack)) - $stack[] = $node; - } - return $this->newInstance($stack); - } - /** - * Enter description here... - * - * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery - */ - public function not($selector = null) { - if (is_string($selector)) - phpQuery::debug(array('not', $selector)); - else - phpQuery::debug('not'); - $stack = array(); - if ($selector instanceof self || $selector instanceof DOMNODE) { - foreach($this->stack() as $node) { - if ($selector instanceof self) { - $matchFound = false; - foreach($selector->stack() as $notNode) { - if ($notNode->isSameNode($node)) - $matchFound = true; - } - if (! $matchFound) - $stack[] = $node; - } else if ($selector instanceof DOMNODE) { - if (! $selector->isSameNode($node)) - $stack[] = $node; - } else { - if (! $this->is($selector)) - $stack[] = $node; - } - } - } else { - $orgStack = $this->stack(); - $matched = $this->filter($selector, true)->stack(); -// $matched = array(); -// // simulate OR in filter() instead of AND 5y -// foreach($this->parseSelector($selector) as $s) { -// $matched = array_merge($matched, -// $this->filter(array($s))->stack() -// ); -// } - foreach($orgStack as $node) - if (! $this->elementsContainsNode($node, $matched)) - $stack[] = $node; - } - return $this->newInstance($stack); - } - /** - * Enter description here... - * - * @param string|phpQueryObject - * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery - */ - public function add($selector = null) { - if (! $selector) - return $this; - $stack = array(); - $this->elementsBackup = $this->elements; - $found = phpQuery::pq($selector, $this->getDocumentID()); - $this->merge($found->elements); - return $this->newInstance(); - } - /** - * @access private - */ - protected function merge() { - foreach(func_get_args() as $nodes) - foreach($nodes as $newNode ) - if (! $this->elementsContainsNode($newNode) ) - $this->elements[] = $newNode; - } - /** - * @access private - * TODO refactor to stackContainsNode - */ - protected function elementsContainsNode($nodeToCheck, $elementsStack = null) { - $loop = ! is_null($elementsStack) - ? $elementsStack - : $this->elements; - foreach($loop as $node) { - if ( $node->isSameNode( $nodeToCheck ) ) - return true; - } - return false; - } - /** - * Enter description here... - * - * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery - */ - public function parent($selector = null) { - $stack = array(); - foreach($this->elements as $node ) - if ( $node->parentNode && ! $this->elementsContainsNode($node->parentNode, $stack) ) - $stack[] = $node->parentNode; - $this->elementsBackup = $this->elements; - $this->elements = $stack; - if ( $selector ) - $this->filter($selector, true); - return $this->newInstance(); - } - /** - * Enter description here... - * - * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery - */ - public function parents($selector = null) { - $stack = array(); - if (! $this->elements ) - $this->debug('parents() - stack empty'); - foreach($this->elements as $node) { - $test = $node; - while( $test->parentNode) { - $test = $test->parentNode; - if ($this->isRoot($test)) - break; - if (! $this->elementsContainsNode($test, $stack)) { - $stack[] = $test; - continue; - } - } - } - $this->elementsBackup = $this->elements; - $this->elements = $stack; - if ( $selector ) - $this->filter($selector, true); - return $this->newInstance(); - } - /** - * Internal stack iterator. - * - * @access private - */ - public function stack($nodeTypes = null) { - if (!isset($nodeTypes)) - return $this->elements; - if (!is_array($nodeTypes)) - $nodeTypes = array($nodeTypes); - $return = array(); - foreach($this->elements as $node) { - if (in_array($node->nodeType, $nodeTypes)) - $return[] = $node; - } - return $return; - } - // TODO phpdoc; $oldAttr is result of hasAttribute, before any changes - protected function attrEvents($attr, $oldAttr, $oldValue, $node) { - // skip events for XML documents - if (! $this->isXHTML() && ! $this->isHTML()) - return; - $event = null; - // identify - $isInputValue = $node->tagName == 'input' - && ( - in_array($node->getAttribute('type'), - array('text', 'password', 'hidden')) - || !$node->getAttribute('type') - ); - $isRadio = $node->tagName == 'input' - && $node->getAttribute('type') == 'radio'; - $isCheckbox = $node->tagName == 'input' - && $node->getAttribute('type') == 'checkbox'; - $isOption = $node->tagName == 'option'; - if ($isInputValue && $attr == 'value' && $oldValue != $node->getAttribute($attr)) { - $event = new DOMEvent(array( - 'target' => $node, - 'type' => 'change' - )); - } else if (($isRadio || $isCheckbox) && $attr == 'checked' && ( - // check - (! $oldAttr && $node->hasAttribute($attr)) - // un-check - || (! $node->hasAttribute($attr) && $oldAttr) - )) { - $event = new DOMEvent(array( - 'target' => $node, - 'type' => 'change' - )); - } else if ($isOption && $node->parentNode && $attr == 'selected' && ( - // select - (! $oldAttr && $node->hasAttribute($attr)) - // un-select - || (! $node->hasAttribute($attr) && $oldAttr) - )) { - $event = new DOMEvent(array( - 'target' => $node->parentNode, - 'type' => 'change' - )); - } - if ($event) { - phpQueryEvents::trigger($this->getDocumentID(), - $event->type, array($event), $node - ); - } - } - public function attr($attr = null, $value = null) { - foreach($this->stack(1) as $node) { - if (! is_null($value)) { - $loop = $attr == '*' - ? $this->getNodeAttrs($node) - : array($attr); - foreach($loop as $a) { - $oldValue = $node->getAttribute($a); - $oldAttr = $node->hasAttribute($a); - // TODO raises an error when charset other than UTF-8 - // while document's charset is also not UTF-8 - @$node->setAttribute($a, $value); - $this->attrEvents($a, $oldAttr, $oldValue, $node); - } - } else if ($attr == '*') { - // jQuery difference - $return = array(); - foreach($node->attributes as $n => $v) - $return[$n] = $v->value; - return $return; - } else - return $node->hasAttribute($attr) - ? $node->getAttribute($attr) - : null; - } - return is_null($value) - ? '' : $this; - } - /** - * @access private - */ - protected function getNodeAttrs($node) { - $return = array(); - foreach($node->attributes as $n => $o) - $return[] = $n; - return $return; - } - /** - * Enter description here... - * - * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery - * @todo check CDATA ??? - */ - public function attrPHP($attr, $code) { - if (! is_null($code)) { - $value = '<'.'?php '.$code.' ?'.'>'; - // TODO tempolary solution - // http://code.google.com/p/phpquery/issues/detail?id=17 -// if (function_exists('mb_detect_encoding') && mb_detect_encoding($value) == 'ASCII') -// $value = mb_convert_encoding($value, 'UTF-8', 'HTML-ENTITIES'); - } - foreach($this->stack(1) as $node) { - if (! is_null($code)) { -// $attrNode = $this->DOM->createAttribute($attr); - $node->setAttribute($attr, $value); -// $attrNode->value = $value; -// $node->appendChild($attrNode); - } else if ( $attr == '*') { - // jQuery diff - $return = array(); - foreach($node->attributes as $n => $v) - $return[$n] = $v->value; - return $return; - } else - return $node->getAttribute($attr); - } - return $this; - } - /** - * Enter description here... - * - * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery - */ - public function removeAttr($attr) { - foreach($this->stack(1) as $node) { - $loop = $attr == '*' - ? $this->getNodeAttrs($node) - : array($attr); - foreach($loop as $a) { - $oldValue = $node->getAttribute($a); - $node->removeAttribute($a); - $this->attrEvents($a, $oldValue, null, $node); - } - } - return $this; - } - /** - * Return form element value. - * - * @return String Fields value. - */ - public function val($val = null) { - if (! isset($val)) { - if ($this->eq(0)->is('select')) { - $selected = $this->eq(0)->find('option[selected=selected]'); - if ($selected->is('[value]')) - return $selected->attr('value'); - else - return $selected->text(); - } else if ($this->eq(0)->is('textarea')) - return $this->eq(0)->markup(); - else - return $this->eq(0)->attr('value'); - } else { - $_val = null; - foreach($this->stack(1) as $node) { - $node = pq($node, $this->getDocumentID()); - if (is_array($val) && in_array($node->attr('type'), array('checkbox', 'radio'))) { - $isChecked = in_array($node->attr('value'), $val) - || in_array($node->attr('name'), $val); - if ($isChecked) - $node->attr('checked', 'checked'); - else - $node->removeAttr('checked'); - } else if ($node->get(0)->tagName == 'select') { - if (! isset($_val)) { - $_val = array(); - if (! is_array($val)) - $_val = array((string)$val); - else - foreach($val as $v) - $_val[] = $v; - } - foreach($node['option']->stack(1) as $option) { - $option = pq($option, $this->getDocumentID()); - $selected = false; - // XXX: workaround for string comparsion, see issue #96 - // http://code.google.com/p/phpquery/issues/detail?id=96 - $selected = is_null($option->attr('value')) - ? in_array($option->markup(), $_val) - : in_array($option->attr('value'), $_val); -// $optionValue = $option->attr('value'); -// $optionText = $option->text(); -// $optionTextLenght = mb_strlen($optionText); -// foreach($_val as $v) -// if ($optionValue == $v) -// $selected = true; -// else if ($optionText == $v && $optionTextLenght == mb_strlen($v)) -// $selected = true; - if ($selected) - $option->attr('selected', 'selected'); - else - $option->removeAttr('selected'); - } - } else if ($node->get(0)->tagName == 'textarea') - $node->markup($val); - else - $node->attr('value', $val); - } - } - return $this; - } - /** - * Enter description here... - * - * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery - */ - public function andSelf() { - if ( $this->previous ) - $this->elements = array_merge($this->elements, $this->previous->elements); - return $this; - } - /** - * Enter description here... - * - * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery - */ - public function addClass( $className) { - if (! $className) - return $this; - foreach($this->stack(1) as $node) { - if (! $this->is(".$className", $node)) - $node->setAttribute( - 'class', - trim($node->getAttribute('class').' '.$className) - ); - } - return $this; - } - /** - * Enter description here... - * - * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery - */ - public function addClassPHP( $className) { - foreach($this->stack(1) as $node) { - $classes = $node->getAttribute('class'); - $newValue = $classes - ? $classes.' <'.'?php '.$className.' ?'.'>' - : '<'.'?php '.$className.' ?'.'>'; - $node->setAttribute('class', $newValue); - } - return $this; - } - /** - * Enter description here... - * - * @param string $className - * @return bool - */ - public function hasClass($className) { - foreach($this->stack(1) as $node) { - if ( $this->is(".$className", $node)) - return true; - } - return false; - } - /** - * Enter description here... - * - * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery - */ - public function removeClass($className) { - foreach($this->stack(1) as $node) { - $classes = explode( ' ', $node->getAttribute('class')); - if ( in_array($className, $classes)) { - $classes = array_diff($classes, array($className)); - if ( $classes ) - $node->setAttribute('class', implode(' ', $classes)); - else - $node->removeAttribute('class'); - } - } - return $this; - } - /** - * Enter description here... - * - * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery - */ - public function toggleClass($className) { - foreach($this->stack(1) as $node) { - if ( $this->is( $node, '.'.$className )) - $this->removeClass($className); - else - $this->addClass($className); - } - return $this; - } - /** - * Proper name without underscore (just ->empty()) also works. - * - * Removes all child nodes from the set of matched elements. - * - * Example: - * pq("p")._empty() - * - * HTML: - *

    Hello, Person and person

    - * - * Result: - * [

    ] - * - * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery - * @access private - */ - public function _empty() { - foreach($this->stack(1) as $node) { - // thx to 'dave at dgx dot cz' - $node->nodeValue = ''; - } - return $this; - } - /** - * Enter description here... - * - * @param array|string $callback Expects $node as first param, $index as second - * @param array $scope External variables passed to callback. Use compact('varName1', 'varName2'...) and extract($scope) - * @param array $arg1 Will ba passed as third and futher args to callback. - * @param array $arg2 Will ba passed as fourth and futher args to callback, and so on... - * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery - */ - public function each($callback, $param1 = null, $param2 = null, $param3 = null) { - $paramStructure = null; - if (func_num_args() > 1) { - $paramStructure = func_get_args(); - $paramStructure = array_slice($paramStructure, 1); - } - foreach($this->elements as $v) - phpQuery::callbackRun($callback, array($v), $paramStructure); - return $this; - } - /** - * Run callback on actual object. - * - * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery - */ - public function callback($callback, $param1 = null, $param2 = null, $param3 = null) { - $params = func_get_args(); - $params[0] = $this; - phpQuery::callbackRun($callback, $params); - return $this; - } - /** - * Enter description here... - * - * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery - * @todo add $scope and $args as in each() ??? - */ - public function map($callback, $param1 = null, $param2 = null, $param3 = null) { -// $stack = array(); -//// foreach($this->newInstance() as $node) { -// foreach($this->newInstance() as $node) { -// $result = call_user_func($callback, $node); -// if ($result) -// $stack[] = $result; -// } - $params = func_get_args(); - array_unshift($params, $this->elements); - return $this->newInstance( - call_user_func_array(array('phpQuery', 'map'), $params) -// phpQuery::map($this->elements, $callback) - ); - } - /** - * Enter description here... - * - * @param $key - * @param $value - */ - public function data($key, $value = null) { - if (! isset($value)) { - // TODO? implement specific jQuery behavior od returning parent values - // is child which we look up doesn't exist - return phpQuery::data($this->get(0), $key, $value, $this->getDocumentID()); - } else { - foreach($this as $node) - phpQuery::data($node, $key, $value, $this->getDocumentID()); - return $this; - } - } - /** - * Enter description here... - * - * @param $key - */ - public function removeData($key) { - foreach($this as $node) - phpQuery::removeData($node, $key, $this->getDocumentID()); - return $this; - } - // INTERFACE IMPLEMENTATIONS - - // ITERATOR INTERFACE - /** - * @access private - */ - public function rewind(){ - $this->debug('iterating foreach'); -// phpQuery::selectDocument($this->getDocumentID()); - $this->elementsBackup = $this->elements; - $this->elementsInterator = $this->elements; - $this->valid = isset( $this->elements[0] ) - ? 1 : 0; -// $this->elements = $this->valid -// ? array($this->elements[0]) -// : array(); - $this->current = 0; - } - /** - * @access private - */ - public function current(){ - return $this->elementsInterator[ $this->current ]; - } - /** - * @access private - */ - public function key(){ - return $this->current; - } - /** - * Double-function method. - * - * First: main iterator interface method. - * Second: Returning next sibling, alias for _next(). - * - * Proper functionality is choosed automagicaly. - * - * @see phpQueryObject::_next() - * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery - */ - public function next($cssSelector = null){ -// if ($cssSelector || $this->valid) -// return $this->_next($cssSelector); - $this->valid = isset( $this->elementsInterator[ $this->current+1 ] ) - ? true - : false; - if (! $this->valid && $this->elementsInterator) { - $this->elementsInterator = null; - } else if ($this->valid) { - $this->current++; - } else { - return $this->_next($cssSelector); - } - } - /** - * @access private - */ - public function valid(){ - return $this->valid; - } - // ITERATOR INTERFACE END - // ARRAYACCESS INTERFACE - /** - * @access private - */ - public function offsetExists($offset) { - return $this->find($offset)->size() > 0; - } - /** - * @access private - */ - public function offsetGet($offset) { - return $this->find($offset); - } - /** - * @access private - */ - public function offsetSet($offset, $value) { -// $this->find($offset)->replaceWith($value); - $this->find($offset)->html($value); - } - /** - * @access private - */ - public function offsetUnset($offset) { - // empty - throw new Exception("Can't do unset, use array interface only for calling queries and replacing HTML."); - } - // ARRAYACCESS INTERFACE END - /** - * Returns node's XPath. - * - * @param unknown_type $oneNode - * @return string - * @TODO use native getNodePath is avaible - * @access private - */ - protected function getNodeXpath($oneNode = null, $namespace = null) { - $return = array(); - $loop = $oneNode - ? array($oneNode) - : $this->elements; -// if ($namespace) -// $namespace .= ':'; - foreach($loop as $node) { - if ($node instanceof DOMDOCUMENT) { - $return[] = ''; - continue; - } - $xpath = array(); - while(! ($node instanceof DOMDOCUMENT)) { - $i = 1; - $sibling = $node; - while($sibling->previousSibling) { - $sibling = $sibling->previousSibling; - $isElement = $sibling instanceof DOMELEMENT; - if ($isElement && $sibling->tagName == $node->tagName) - $i++; - } - $xpath[] = $this->isXML() - ? "*[local-name()='{$node->tagName}'][{$i}]" - : "{$node->tagName}[{$i}]"; - $node = $node->parentNode; - } - $xpath = join('/', array_reverse($xpath)); - $return[] = '/'.$xpath; - } - return $oneNode - ? $return[0] - : $return; - } - // HELPERS - public function whois($oneNode = null) { - $return = array(); - $loop = $oneNode - ? array( $oneNode ) - : $this->elements; - foreach($loop as $node) { - if (isset($node->tagName)) { - $tag = in_array($node->tagName, array('php', 'js')) - ? strtoupper($node->tagName) - : $node->tagName; - $return[] = $tag - .($node->getAttribute('id') - ? '#'.$node->getAttribute('id'):'') - .($node->getAttribute('class') - ? '.'.join('.', split(' ', $node->getAttribute('class'))):'') - .($node->getAttribute('name') - ? '[name="'.$node->getAttribute('name').'"]':'') - .($node->getAttribute('value') && strpos($node->getAttribute('value'), '<'.'?php') === false - ? '[value="'.substr(str_replace("\n", '', $node->getAttribute('value')), 0, 15).'"]':'') - .($node->getAttribute('value') && strpos($node->getAttribute('value'), '<'.'?php') !== false - ? '[value=PHP]':'') - .($node->getAttribute('selected') - ? '[selected]':'') - .($node->getAttribute('checked') - ? '[checked]':'') - ; - } else if ($node instanceof DOMTEXT) { - if (trim($node->textContent)) - $return[] = 'Text:'.substr(str_replace("\n", ' ', $node->textContent), 0, 15); - } else { - - } - } - return $oneNode && isset($return[0]) - ? $return[0] - : $return; - } - /** - * Dump htmlOuter and preserve chain. Usefull for debugging. - * - * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery - * - */ - public function dump() { - print 'DUMP #'.(phpQuery::$dumpCount++).' '; - $debug = phpQuery::$debug; - phpQuery::$debug = false; -// print __FILE__.':'.__LINE__."\n"; - var_dump($this->htmlOuter()); - return $this; - } - public function dumpWhois() { - print 'DUMP #'.(phpQuery::$dumpCount++).' '; - $debug = phpQuery::$debug; - phpQuery::$debug = false; -// print __FILE__.':'.__LINE__."\n"; - var_dump('whois', $this->whois()); - phpQuery::$debug = $debug; - return $this; - } - public function dumpLength() { - print 'DUMP #'.(phpQuery::$dumpCount++).' '; - $debug = phpQuery::$debug; - phpQuery::$debug = false; -// print __FILE__.':'.__LINE__."\n"; - var_dump('length', $this->length()); - phpQuery::$debug = $debug; - return $this; - } - public function dumpTree($html = true, $title = true) { - $output = $title - ? 'DUMP #'.(phpQuery::$dumpCount++)." \n" : ''; - $debug = phpQuery::$debug; - phpQuery::$debug = false; - foreach($this->stack() as $node) - $output .= $this->__dumpTree($node); - phpQuery::$debug = $debug; - print $html - ? nl2br(str_replace(' ', ' ', $output)) - : $output; - return $this; - } - private function __dumpTree($node, $intend = 0) { - $whois = $this->whois($node); - $return = ''; - if ($whois) - $return .= str_repeat(' - ', $intend).$whois."\n"; - if (isset($node->childNodes)) - foreach($node->childNodes as $chNode) - $return .= $this->__dumpTree($chNode, $intend+1); - return $return; - } - /** - * Dump htmlOuter and stop script execution. Usefull for debugging. - * - */ - public function dumpDie() { - print __FILE__.':'.__LINE__; - var_dump($this->htmlOuter()); - die(); - } -} - - -// -- Multibyte Compatibility functions --------------------------------------- -// http://svn.iphonewebdev.com/lace/lib/mb_compat.php - -/** - * mb_internal_encoding() - * - * Included for mbstring pseudo-compatability. - */ -if (!function_exists('mb_internal_encoding')) -{ - function mb_internal_encoding($enc) {return true; } -} - -/** - * mb_regex_encoding() - * - * Included for mbstring pseudo-compatability. - */ -if (!function_exists('mb_regex_encoding')) -{ - function mb_regex_encoding($enc) {return true; } -} - -/** - * mb_strlen() - * - * Included for mbstring pseudo-compatability. - */ -if (!function_exists('mb_strlen')) -{ - function mb_strlen($str) - { - return strlen($str); - } -} - -/** - * mb_strpos() - * - * Included for mbstring pseudo-compatability. - */ -if (!function_exists('mb_strpos')) -{ - function mb_strpos($haystack, $needle, $offset=0) - { - return strpos($haystack, $needle, $offset); - } -} -/** - * mb_stripos() - * - * Included for mbstring pseudo-compatability. - */ -if (!function_exists('mb_stripos')) -{ - function mb_stripos($haystack, $needle, $offset=0) - { - return stripos($haystack, $needle, $offset); - } -} - -/** - * mb_substr() - * - * Included for mbstring pseudo-compatability. - */ -if (!function_exists('mb_substr')) -{ - function mb_substr($str, $start, $length=0) - { - return substr($str, $start, $length); - } -} - -/** - * mb_substr_count() - * - * Included for mbstring pseudo-compatability. - */ -if (!function_exists('mb_substr_count')) -{ - function mb_substr_count($haystack, $needle) - { - return substr_count($haystack, $needle); - } -} - - -/** - * Static namespace for phpQuery functions. - * - * @author Tobiasz Cudnik - * @package phpQuery - */ -abstract class phpQuery { - /** - * XXX: Workaround for mbstring problems - * - * @var bool - */ - public static $mbstringSupport = true; - public static $debug = false; - public static $documents = array(); - public static $defaultDocumentID = null; -// public static $defaultDoctype = 'html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"'; - /** - * Applies only to HTML. - * - * @var unknown_type - */ - public static $defaultDoctype = ''; - public static $defaultCharset = 'UTF-8'; - /** - * Static namespace for plugins. - * - * @var object - */ - public static $plugins = array(); - /** - * List of loaded plugins. - * - * @var unknown_type - */ - public static $pluginsLoaded = array(); - public static $pluginsMethods = array(); - public static $pluginsStaticMethods = array(); - public static $extendMethods = array(); - /** - * @TODO implement - */ - public static $extendStaticMethods = array(); - /** - * Hosts allowed for AJAX connections. - * Dot '.' means $_SERVER['HTTP_HOST'] (if any). - * - * @var array - */ - public static $ajaxAllowedHosts = array( - '.' - ); - /** - * AJAX settings. - * - * @var array - * XXX should it be static or not ? - */ - public static $ajaxSettings = array( - 'url' => '',//TODO - 'global' => true, - 'type' => "GET", - 'timeout' => null, - 'contentType' => "application/x-www-form-urlencoded", - 'processData' => true, -// 'async' => true, - 'data' => null, - 'username' => null, - 'password' => null, - 'accepts' => array( - 'xml' => "application/xml, text/xml", - 'html' => "text/html", - 'script' => "text/javascript, application/javascript", - 'json' => "application/json, text/javascript", - 'text' => "text/plain", - '_default' => "*/*" - ) - ); - public static $lastModified = null; - public static $active = 0; - public static $dumpCount = 0; - /** - * Multi-purpose function. - * Use pq() as shortcut. - * - * In below examples, $pq is any result of pq(); function. - * - * 1. Import markup into existing document (without any attaching): - * - Import into selected document: - * pq('
    ') // DOESNT accept text nodes at beginning of input string ! - * - Import into document with ID from $pq->getDocumentID(): - * pq('
    ', $pq->getDocumentID()) - * - Import into same document as DOMNode belongs to: - * pq('
    ', DOMNode) - * - Import into document from phpQuery object: - * pq('
    ', $pq) - * - * 2. Run query: - * - Run query on last selected document: - * pq('div.myClass') - * - Run query on document with ID from $pq->getDocumentID(): - * pq('div.myClass', $pq->getDocumentID()) - * - Run query on same document as DOMNode belongs to and use node(s)as root for query: - * pq('div.myClass', DOMNode) - * - Run query on document from phpQuery object - * and use object's stack as root node(s) for query: - * pq('div.myClass', $pq) - * - * @param string|DOMNode|DOMNodeList|array $arg1 HTML markup, CSS Selector, DOMNode or array of DOMNodes - * @param string|phpQueryObject|DOMNode $context DOM ID from $pq->getDocumentID(), phpQuery object (determines also query root) or DOMNode (determines also query root) - * - * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery|QueryTemplatesPhpQuery|false - * phpQuery object or false in case of error. - */ - public static function pq($arg1, $context = null) { - if ($arg1 instanceof DOMNODE && ! isset($context)) { - foreach(phpQuery::$documents as $documentWrapper) { - $compare = $arg1 instanceof DOMDocument - ? $arg1 : $arg1->ownerDocument; - if ($documentWrapper->document->isSameNode($compare)) - $context = $documentWrapper->id; - } - } - if (! $context) { - $domId = self::$defaultDocumentID; - if (! $domId) - throw new Exception("Can't use last created DOM, because there isn't any. Use phpQuery::newDocument() first."); -// } else if (is_object($context) && ($context instanceof PHPQUERY || is_subclass_of($context, 'phpQueryObject'))) - } else if (is_object($context) && $context instanceof phpQueryObject) - $domId = $context->getDocumentID(); - else if ($context instanceof DOMDOCUMENT) { - $domId = self::getDocumentID($context); - if (! $domId) { - //throw new Exception('Orphaned DOMDocument'); - $domId = self::newDocument($context)->getDocumentID(); - } - } else if ($context instanceof DOMNODE) { - $domId = self::getDocumentID($context); - if (! $domId) { - throw new Exception('Orphaned DOMNode'); -// $domId = self::newDocument($context->ownerDocument); - } - } else - $domId = $context; - if ($arg1 instanceof phpQueryObject) { -// if (is_object($arg1) && (get_class($arg1) == 'phpQueryObject' || $arg1 instanceof PHPQUERY || is_subclass_of($arg1, 'phpQueryObject'))) { - /** - * Return $arg1 or import $arg1 stack if document differs: - * pq(pq('
    ')) - */ - if ($arg1->getDocumentID() == $domId) - return $arg1; - $class = get_class($arg1); - // support inheritance by passing old object to overloaded constructor - $phpQuery = $class != 'phpQuery' - ? new $class($arg1, $domId) - : new phpQueryObject($domId); - $phpQuery->elements = array(); - foreach($arg1->elements as $node) - $phpQuery->elements[] = $phpQuery->document->importNode($node, true); - return $phpQuery; - } else if ($arg1 instanceof DOMNODE || (is_array($arg1) && isset($arg1[0]) && $arg1[0] instanceof DOMNODE)) { - /* - * Wrap DOM nodes with phpQuery object, import into document when needed: - * pq(array($domNode1, $domNode2)) - */ - $phpQuery = new phpQueryObject($domId); - if (!($arg1 instanceof DOMNODELIST) && ! is_array($arg1)) - $arg1 = array($arg1); - $phpQuery->elements = array(); - foreach($arg1 as $node) { - $sameDocument = $node->ownerDocument instanceof DOMDOCUMENT - && ! $node->ownerDocument->isSameNode($phpQuery->document); - $phpQuery->elements[] = $sameDocument - ? $phpQuery->document->importNode($node, true) - : $node; - } - return $phpQuery; - } else if (self::isMarkup($arg1)) { - /** - * Import HTML: - * pq('
    ') - */ - $phpQuery = new phpQueryObject($domId); - return $phpQuery->newInstance( - $phpQuery->documentWrapper->import($arg1) - ); - } else { - /** - * Run CSS query: - * pq('div.myClass') - */ - $phpQuery = new phpQueryObject($domId); -// if ($context && ($context instanceof PHPQUERY || is_subclass_of($context, 'phpQueryObject'))) - if ($context && $context instanceof phpQueryObject) - $phpQuery->elements = $context->elements; - else if ($context && $context instanceof DOMNODELIST) { - $phpQuery->elements = array(); - foreach($context as $node) - $phpQuery->elements[] = $node; - } else if ($context && $context instanceof DOMNODE) - $phpQuery->elements = array($context); - return $phpQuery->find($arg1); - } - } - /** - * Sets default document to $id. Document has to be loaded prior - * to using this method. - * $id can be retrived via getDocumentID() or getDocumentIDRef(). - * - * @param unknown_type $id - */ - public static function selectDocument($id) { - $id = self::getDocumentID($id); - self::debug("Selecting document '$id' as default one"); - self::$defaultDocumentID = self::getDocumentID($id); - } - /** - * Returns document with id $id or last used as phpQueryObject. - * $id can be retrived via getDocumentID() or getDocumentIDRef(). - * Chainable. - * - * @see phpQuery::selectDocument() - * @param unknown_type $id - * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery - */ - public static function getDocument($id = null) { - if ($id) - phpQuery::selectDocument($id); - else - $id = phpQuery::$defaultDocumentID; - return new phpQueryObject($id); - } - /** - * Creates new document from markup. - * Chainable. - * - * @param unknown_type $markup - * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery - */ - public static function newDocument($markup = null, $contentType = null) { - if (! $markup) - $markup = ''; - $documentID = phpQuery::createDocumentWrapper($markup, $contentType); - return new phpQueryObject($documentID); - } - /** - * Creates new document from markup. - * Chainable. - * - * @param unknown_type $markup - * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery - */ - public static function newDocumentHTML($markup = null, $charset = null) { - $contentType = $charset - ? ";charset=$charset" - : ''; - return self::newDocument($markup, "text/html{$contentType}"); - } - /** - * Creates new document from markup. - * Chainable. - * - * @param unknown_type $markup - * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery - */ - public static function newDocumentXML($markup = null, $charset = null) { - $contentType = $charset - ? ";charset=$charset" - : ''; - return self::newDocument($markup, "text/xml{$contentType}"); - } - /** - * Creates new document from markup. - * Chainable. - * - * @param unknown_type $markup - * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery - */ - public static function newDocumentXHTML($markup = null, $charset = null) { - $contentType = $charset - ? ";charset=$charset" - : ''; - return self::newDocument($markup, "application/xhtml+xml{$contentType}"); - } - /** - * Creates new document from markup. - * Chainable. - * - * @param unknown_type $markup - * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery - */ - public static function newDocumentPHP($markup = null, $contentType = "text/html") { - // TODO pass charset to phpToMarkup if possible (use DOMDocumentWrapper function) - $markup = phpQuery::phpToMarkup($markup, self::$defaultCharset); - return self::newDocument($markup, $contentType); - } - public static function phpToMarkup($php, $charset = 'utf-8') { - $regexes = array( - '@(<(?!\\?)(?:[^>]|\\?>)+\\w+\\s*=\\s*)(\')([^\']*)<'.'?php?(.*?)(?:\\?>)([^\']*)\'@s', - '@(<(?!\\?)(?:[^>]|\\?>)+\\w+\\s*=\\s*)(")([^"]*)<'.'?php?(.*?)(?:\\?>)([^"]*)"@s', - ); - foreach($regexes as $regex) - while (preg_match($regex, $php, $matches)) { - $php = preg_replace_callback( - $regex, -// create_function('$m, $charset = "'.$charset.'"', -// 'return $m[1].$m[2] -// .htmlspecialchars("<"."?php".$m[4]."?".">", ENT_QUOTES|ENT_NOQUOTES, $charset) -// .$m[5].$m[2];' -// ), - array('phpQuery', '_phpToMarkupCallback'), - $php - ); - } - $regex = '@(^|>[^<]*)+?(<\?php(.*?)(\?>))@s'; -//preg_match_all($regex, $php, $matches); -//var_dump($matches); - $php = preg_replace($regex, '\\1', $php); - return $php; - } - public static function _phpToMarkupCallback($php, $charset = 'utf-8') { - return $m[1].$m[2] - .htmlspecialchars("<"."?php".$m[4]."?".">", ENT_QUOTES|ENT_NOQUOTES, $charset) - .$m[5].$m[2]; - } - public static function _markupToPHPCallback($m) { - return "<"."?php ".htmlspecialchars_decode($m[1])." ?".">"; - } - /** - * Converts document markup containing PHP code generated by phpQuery::php() - * into valid (executable) PHP code syntax. - * - * @param string|phpQueryObject $content - * @return string PHP code. - */ - public static function markupToPHP($content) { - if ($content instanceof phpQueryObject) - $content = $content->markupOuter(); - /* ... to */ - $content = preg_replace_callback( - '@\s*\s*@s', -// create_function('$m', -// 'return "<'.'?php ".htmlspecialchars_decode($m[1])." ?'.'>";' -// ), - array('phpQuery', '_markupToPHPCallback'), - $content - ); - /* extra space added to save highlighters */ - $regexes = array( - '@(<(?!\\?)(?:[^>]|\\?>)+\\w+\\s*=\\s*)(\')([^\']*)(?:<|%3C)\\?(?:php)?(.*?)(?:\\?(?:>|%3E))([^\']*)\'@s', - '@(<(?!\\?)(?:[^>]|\\?>)+\\w+\\s*=\\s*)(")([^"]*)(?:<|%3C)\\?(?:php)?(.*?)(?:\\?(?:>|%3E))([^"]*)"@s', - ); - foreach($regexes as $regex) - while (preg_match($regex, $content)) - $content = preg_replace_callback( - $regex, - create_function('$m', - 'return $m[1].$m[2].$m[3]."", " ", "\n", " ", "{", "$", "}", \'"\', "[", "]"), - htmlspecialchars_decode($m[4]) - ) - ." ?>".$m[5].$m[2];' - ), - $content - ); - return $content; - } - - - public static function loadDocumentHTML($html) - { - self::newDocumentFile($html, null, true); - } - - /** - * Creates new document from file $file. - * Chainable. - * - * @param string $file URLs allowed. See File wrapper page at php.net for more supported sources. - * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery - */ - public static function newDocumentFile($file, $contentType = null, $is_html = false) { - if ($is_html) - { - $documentID = self::createDocumentWrapper( - $file, $contentType - ); - } - else - { - $documentID = self::createDocumentWrapper( - file_get_contents($file), $contentType - ); - } - return new phpQueryObject($documentID); - } - /** - * Creates new document from markup. - * Chainable. - * - * @param unknown_type $markup - * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery - */ - public static function newDocumentFileHTML($file, $charset = null) { - $contentType = $charset - ? ";charset=$charset" - : ''; - return self::newDocumentFile($file, "text/html{$contentType}"); - } - /** - * Creates new document from markup. - * Chainable. - * - * @param unknown_type $markup - * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery - */ - public static function newDocumentFileXML($file, $charset = null) { - $contentType = $charset - ? ";charset=$charset" - : ''; - return self::newDocumentFile($file, "text/xml{$contentType}"); - } - /** - * Creates new document from markup. - * Chainable. - * - * @param unknown_type $markup - * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery - */ - public static function newDocumentFileXHTML($file, $charset = null) { - $contentType = $charset - ? ";charset=$charset" - : ''; - return self::newDocumentFile($file, "application/xhtml+xml{$contentType}"); - } - /** - * Creates new document from markup. - * Chainable. - * - * @param unknown_type $markup - * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery - */ - public static function newDocumentFilePHP($file, $contentType = null) { - return self::newDocumentPHP(file_get_contents($file), $contentType); - } - /** - * Reuses existing DOMDocument object. - * Chainable. - * - * @param $document DOMDocument - * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery - * @TODO support DOMDocument - */ - public static function loadDocument($document) { - // TODO - die('TODO loadDocument'); - } - /** - * Enter description here... - * - * @param unknown_type $html - * @param unknown_type $domId - * @return unknown New DOM ID - * @todo support PHP tags in input - * @todo support passing DOMDocument object from self::loadDocument - */ - protected static function createDocumentWrapper($html, $contentType = null, $documentID = null) { - if (function_exists('domxml_open_mem')) - throw new Exception("Old PHP4 DOM XML extension detected. phpQuery won't work until this extension is enabled."); -// $id = $documentID -// ? $documentID -// : md5(microtime()); - $document = null; - if ($html instanceof DOMDOCUMENT) { - if (self::getDocumentID($html)) { - // document already exists in phpQuery::$documents, make a copy - $document = clone $html; - } else { - // new document, add it to phpQuery::$documents - $wrapper = new DOMDocumentWrapper($html, $contentType, $documentID); - } - } else { - $wrapper = new DOMDocumentWrapper($html, $contentType, $documentID); - } -// $wrapper->id = $id; - // bind document - phpQuery::$documents[$wrapper->id] = $wrapper; - // remember last loaded document - phpQuery::selectDocument($wrapper->id); - return $wrapper->id; - } - /** - * Extend class namespace. - * - * @param string|array $target - * @param array $source - * @TODO support string $source - * @return unknown_type - */ - public static function extend($target, $source) { - switch($target) { - case 'phpQueryObject': - $targetRef = &self::$extendMethods; - $targetRef2 = &self::$pluginsMethods; - break; - case 'phpQuery': - $targetRef = &self::$extendStaticMethods; - $targetRef2 = &self::$pluginsStaticMethods; - break; - default: - throw new Exception("Unsupported \$target type"); - } - if (is_string($source)) - $source = array($source => $source); - foreach($source as $method => $callback) { - if (isset($targetRef[$method])) { -// throw new Exception - self::debug("Duplicate method '{$method}', can\'t extend '{$target}'"); - continue; - } - if (isset($targetRef2[$method])) { -// throw new Exception - self::debug("Duplicate method '{$method}' from plugin '{$targetRef2[$method]}'," - ." can\'t extend '{$target}'"); - continue; - } - $targetRef[$method] = $callback; - } - return true; - } - /** - * Extend phpQuery with $class from $file. - * - * @param string $class Extending class name. Real class name can be prepended phpQuery_. - * @param string $file Filename to include. Defaults to "{$class}.php". - */ - public static function plugin($class, $file = null) { - // TODO $class checked agains phpQuery_$class -// if (strpos($class, 'phpQuery') === 0) -// $class = substr($class, 8); - if (in_array($class, self::$pluginsLoaded)) - return true; - if (! $file) - $file = $class.'.php'; - $objectClassExists = class_exists('phpQueryObjectPlugin_'.$class); - $staticClassExists = class_exists('phpQueryPlugin_'.$class); - if (! $objectClassExists && ! $staticClassExists) - require_once($file); - self::$pluginsLoaded[] = $class; - // static methods - if (class_exists('phpQueryPlugin_'.$class)) { - $realClass = 'phpQueryPlugin_'.$class; - $vars = get_class_vars($realClass); - $loop = isset($vars['phpQueryMethods']) - && ! is_null($vars['phpQueryMethods']) - ? $vars['phpQueryMethods'] - : get_class_methods($realClass); - foreach($loop as $method) { - if ($method == '__initialize') - continue; - if (! is_callable(array($realClass, $method))) - continue; - if (isset(self::$pluginsStaticMethods[$method])) { - throw new Exception("Duplicate method '{$method}' from plugin '{$c}' conflicts with same method from plugin '".self::$pluginsStaticMethods[$method]."'"); - return; - } - self::$pluginsStaticMethods[$method] = $class; - } - if (method_exists($realClass, '__initialize')) - call_user_func_array(array($realClass, '__initialize'), array()); - } - // object methods - if (class_exists('phpQueryObjectPlugin_'.$class)) { - $realClass = 'phpQueryObjectPlugin_'.$class; - $vars = get_class_vars($realClass); - $loop = isset($vars['phpQueryMethods']) - && ! is_null($vars['phpQueryMethods']) - ? $vars['phpQueryMethods'] - : get_class_methods($realClass); - foreach($loop as $method) { - if (! is_callable(array($realClass, $method))) - continue; - if (isset(self::$pluginsMethods[$method])) { - throw new Exception("Duplicate method '{$method}' from plugin '{$c}' conflicts with same method from plugin '".self::$pluginsMethods[$method]."'"); - continue; - } - self::$pluginsMethods[$method] = $class; - } - } - return true; - } - /** - * Unloades all or specified document from memory. - * - * @param mixed $documentID @see phpQuery::getDocumentID() for supported types. - */ - public static function unloadDocuments($id = null) { - if (isset($id)) { - if ($id = self::getDocumentID($id)) - unset(phpQuery::$documents[$id]); - } else { - foreach(phpQuery::$documents as $k => $v) { - unset(phpQuery::$documents[$k]); - } - } - } - /** - * Parses phpQuery object or HTML result against PHP tags and makes them active. - * - * @param phpQuery|string $content - * @deprecated - * @return string - */ - public static function unsafePHPTags($content) { - return self::markupToPHP($content); - } - public static function DOMNodeListToArray($DOMNodeList) { - $array = array(); - if (! $DOMNodeList) - return $array; - foreach($DOMNodeList as $node) - $array[] = $node; - return $array; - } - /** - * Checks if $input is HTML string, which has to start with '<'. - * - * @deprecated - * @param String $input - * @return Bool - * @todo still used ? - */ - public static function isMarkup($input) { - return ! is_array($input) && substr(trim($input), 0, 1) == '<'; - } - public static function debug($text) { - if (self::$debug) - print var_dump($text); - } - /** - * Make an AJAX request. - * - * @param array See $options http://docs.jquery.com/Ajax/jQuery.ajax#toptions - * Additional options are: - * 'document' - document for global events, @see phpQuery::getDocumentID() - * 'referer' - implemented - * 'requested_with' - TODO; not implemented (X-Requested-With) - * @return Zend_Http_Client - * @link http://docs.jquery.com/Ajax/jQuery.ajax - * - * @TODO $options['cache'] - * @TODO $options['processData'] - * @TODO $options['xhr'] - * @TODO $options['data'] as string - * @TODO XHR interface - */ - public static function ajax($options = array(), $xhr = null) { - $options = array_merge( - self::$ajaxSettings, $options - ); - $documentID = isset($options['document']) - ? self::getDocumentID($options['document']) - : null; - if ($xhr) { - // reuse existing XHR object, but clean it up - $client = $xhr; -// $client->setParameterPost(null); -// $client->setParameterGet(null); - $client->setAuth(false); - $client->setHeaders("If-Modified-Since", null); - $client->setHeaders("Referer", null); - $client->resetParameters(); - } else { - // create new XHR object - require_once('Zend/Http/Client.php'); - $client = new Zend_Http_Client(); - $client->setCookieJar(); - } - if (isset($options['timeout'])) - $client->setConfig(array( - 'timeout' => $options['timeout'], - )); -// 'maxredirects' => 0, - foreach(self::$ajaxAllowedHosts as $k => $host) - if ($host == '.' && isset($_SERVER['HTTP_HOST'])) - self::$ajaxAllowedHosts[$k] = $_SERVER['HTTP_HOST']; - $host = parse_url($options['url'], PHP_URL_HOST); - if (! in_array($host, self::$ajaxAllowedHosts)) { - throw new Exception("Request not permitted, host '$host' not present in " - ."phpQuery::\$ajaxAllowedHosts"); - } - // JSONP - $jsre = "/=\\?(&|$)/"; - if (isset($options['dataType']) && $options['dataType'] == 'jsonp') { - $jsonpCallbackParam = $options['jsonp'] - ? $options['jsonp'] : 'callback'; - if (strtolower($options['type']) == 'get') { - if (! preg_match($jsre, $options['url'])) { - $sep = strpos($options['url'], '?') - ? '&' : '?'; - $options['url'] .= "$sep$jsonpCallbackParam=?"; - } - } else if ($options['data']) { - $jsonp = false; - foreach($options['data'] as $n => $v) { - if ($v == '?') - $jsonp = true; - } - if (! $jsonp) { - $options['data'][$jsonpCallbackParam] = '?'; - } - } - $options['dataType'] = 'json'; - } - if (isset($options['dataType']) && $options['dataType'] == 'json') { - $jsonpCallback = 'json_'.md5(microtime()); - $jsonpData = $jsonpUrl = false; - if ($options['data']) { - foreach($options['data'] as $n => $v) { - if ($v == '?') - $jsonpData = $n; - } - } - if (preg_match($jsre, $options['url'])) - $jsonpUrl = true; - if ($jsonpData !== false || $jsonpUrl) { - // remember callback name for httpData() - $options['_jsonp'] = $jsonpCallback; - if ($jsonpData !== false) - $options['data'][$jsonpData] = $jsonpCallback; - if ($jsonpUrl) - $options['url'] = preg_replace($jsre, "=$jsonpCallback\\1", $options['url']); - } - } - $client->setUri($options['url']); - $client->setMethod(strtoupper($options['type'])); - if (isset($options['referer']) && $options['referer']) - $client->setHeaders('Referer', $options['referer']); - $client->setHeaders(array( -// 'content-type' => $options['contentType'], - 'User-Agent' => 'Mozilla/5.0 (X11; U; Linux x86; en-US; rv:1.9.0.5) Gecko' - .'/2008122010 Firefox/3.0.5', - // TODO custom charset - 'Accept-Charset' => 'ISO-8859-1,utf-8;q=0.7,*;q=0.7', -// 'Connection' => 'keep-alive', -// 'Accept' => 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', - 'Accept-Language' => 'en-us,en;q=0.5', - )); - if ($options['username']) - $client->setAuth($options['username'], $options['password']); - if (isset($options['ifModified']) && $options['ifModified']) - $client->setHeaders("If-Modified-Since", - self::$lastModified - ? self::$lastModified - : "Thu, 01 Jan 1970 00:00:00 GMT" - ); - $client->setHeaders("Accept", - isset($options['dataType']) - && isset(self::$ajaxSettings['accepts'][ $options['dataType'] ]) - ? self::$ajaxSettings['accepts'][ $options['dataType'] ].", */*" - : self::$ajaxSettings['accepts']['_default'] - ); - // TODO $options['processData'] - if ($options['data'] instanceof phpQueryObject) { - $serialized = $options['data']->serializeArray($options['data']); - $options['data'] = array(); - foreach($serialized as $r) - $options['data'][ $r['name'] ] = $r['value']; - } - if (strtolower($options['type']) == 'get') { - $client->setParameterGet($options['data']); - } else if (strtolower($options['type']) == 'post') { - $client->setEncType($options['contentType']); - $client->setParameterPost($options['data']); - } - if (self::$active == 0 && $options['global']) - phpQueryEvents::trigger($documentID, 'ajaxStart'); - self::$active++; - // beforeSend callback - if (isset($options['beforeSend']) && $options['beforeSend']) - phpQuery::callbackRun($options['beforeSend'], array($client)); - // ajaxSend event - if ($options['global']) - phpQueryEvents::trigger($documentID, 'ajaxSend', array($client, $options)); - if (phpQuery::$debug) { - self::debug("{$options['type']}: {$options['url']}\n"); - self::debug("Options:
    ".var_export($options, true)."
    \n"); -// if ($client->getCookieJar()) -// self::debug("Cookies:
    ".var_export($client->getCookieJar()->getMatchingCookies($options['url']), true)."
    \n"); - } - // request - $response = $client->request(); - if (phpQuery::$debug) { - self::debug('Status: '.$response->getStatus().' / '.$response->getMessage()); - self::debug($client->getLastRequest()); - self::debug($response->getHeaders()); - } - if ($response->isSuccessful()) { - // XXX tempolary - self::$lastModified = $response->getHeader('Last-Modified'); - $data = self::httpData($response->getBody(), $options['dataType'], $options); - if (isset($options['success']) && $options['success']) - phpQuery::callbackRun($options['success'], array($data, $response->getStatus(), $options)); - if ($options['global']) - phpQueryEvents::trigger($documentID, 'ajaxSuccess', array($client, $options)); - } else { - if (isset($options['error']) && $options['error']) - phpQuery::callbackRun($options['error'], array($client, $response->getStatus(), $response->getMessage())); - if ($options['global']) - phpQueryEvents::trigger($documentID, 'ajaxError', array($client, /*$response->getStatus(),*/$response->getMessage(), $options)); - } - if (isset($options['complete']) && $options['complete']) - phpQuery::callbackRun($options['complete'], array($client, $response->getStatus())); - if ($options['global']) - phpQueryEvents::trigger($documentID, 'ajaxComplete', array($client, $options)); - if ($options['global'] && ! --self::$active) - phpQueryEvents::trigger($documentID, 'ajaxStop'); - return $client; -// if (is_null($domId)) -// $domId = self::$defaultDocumentID ? self::$defaultDocumentID : false; -// return new phpQueryAjaxResponse($response, $domId); - } - protected static function httpData($data, $type, $options) { - if (isset($options['dataFilter']) && $options['dataFilter']) - $data = self::callbackRun($options['dataFilter'], array($data, $type)); - if (is_string($data)) { - if ($type == "json") { - if (isset($options['_jsonp']) && $options['_jsonp']) { - $data = preg_replace('/^\s*\w+\((.*)\)\s*$/s', '$1', $data); - } - $data = self::parseJSON($data); - } - } - return $data; - } - /** - * Enter description here... - * - * @param array|phpQuery $data - * - */ - public static function param($data) { - return http_build_query($data, null, '&'); - } - public static function get($url, $data = null, $callback = null, $type = null) { - if (!is_array($data)) { - $callback = $data; - $data = null; - } - // TODO some array_values on this shit - return phpQuery::ajax(array( - 'type' => 'GET', - 'url' => $url, - 'data' => $data, - 'success' => $callback, - 'dataType' => $type, - )); - } - public static function post($url, $data = null, $callback = null, $type = null) { - if (!is_array($data)) { - $callback = $data; - $data = null; - } - return phpQuery::ajax(array( - 'type' => 'POST', - 'url' => $url, - 'data' => $data, - 'success' => $callback, - 'dataType' => $type, - )); - } - public static function getJSON($url, $data = null, $callback = null) { - if (!is_array($data)) { - $callback = $data; - $data = null; - } - // TODO some array_values on this shit - return phpQuery::ajax(array( - 'type' => 'GET', - 'url' => $url, - 'data' => $data, - 'success' => $callback, - 'dataType' => 'json', - )); - } - public static function ajaxSetup($options) { - self::$ajaxSettings = array_merge( - self::$ajaxSettings, - $options - ); - } - public static function ajaxAllowHost($host1, $host2 = null, $host3 = null) { - $loop = is_array($host1) - ? $host1 - : func_get_args(); - foreach($loop as $host) { - if ($host && ! in_array($host, phpQuery::$ajaxAllowedHosts)) { - phpQuery::$ajaxAllowedHosts[] = $host; - } - } - } - public static function ajaxAllowURL($url1, $url2 = null, $url3 = null) { - $loop = is_array($url1) - ? $url1 - : func_get_args(); - foreach($loop as $url) - phpQuery::ajaxAllowHost(parse_url($url, PHP_URL_HOST)); - } - /** - * Returns JSON representation of $data. - * - * @static - * @param mixed $data - * @return string - */ - public static function toJSON($data) { - if (function_exists('json_encode')) - return json_encode($data); - require_once('Zend/Json/Encoder.php'); - return Zend_Json_Encoder::encode($data); - } - /** - * Parses JSON into proper PHP type. - * - * @static - * @param string $json - * @return mixed - */ - public static function parseJSON($json) { - if (function_exists('json_decode')) { - $return = json_decode(trim($json), true); - // json_decode and UTF8 issues - if (isset($return)) - return $return; - } - require_once('Zend/Json/Decoder.php'); - return Zend_Json_Decoder::decode($json); - } - /** - * Returns source's document ID. - * - * @param $source DOMNode|phpQueryObject - * @return string - */ - public static function getDocumentID($source) { - if ($source instanceof DOMDOCUMENT) { - foreach(phpQuery::$documents as $id => $document) { - if ($source->isSameNode($document->document)) - return $id; - } - } else if ($source instanceof DOMNODE) { - foreach(phpQuery::$documents as $id => $document) { - if ($source->ownerDocument->isSameNode($document->document)) - return $id; - } - } else if ($source instanceof phpQueryObject) - return $source->getDocumentID(); - else if (is_string($source) && isset(phpQuery::$documents[$source])) - return $source; - } - /** - * Get DOMDocument object related to $source. - * Returns null if such document doesn't exist. - * - * @param $source DOMNode|phpQueryObject|string - * @return string - */ - public static function getDOMDocument($source) { - if ($source instanceof DOMDOCUMENT) - return $source; - $source = self::getDocumentID($source); - return $source - ? self::$documents[$id]['document'] - : null; - } - - // UTILITIES - // http://docs.jquery.com/Utilities - - /** - * - * @return unknown_type - * @link http://docs.jquery.com/Utilities/jQuery.makeArray - */ - public static function makeArray($obj) { - $array = array(); - if (is_object($object) && $object instanceof DOMNODELIST) { - foreach($object as $value) - $array[] = $value; - } else if (is_object($object) && ! ($object instanceof Iterator)) { - foreach(get_object_vars($object) as $name => $value) - $array[0][$name] = $value; - } else { - foreach($object as $name => $value) - $array[0][$name] = $value; - } - return $array; - } - public static function inArray($value, $array) { - return in_array($value, $array); - } - /** - * - * @param $object - * @param $callback - * @return unknown_type - * @link http://docs.jquery.com/Utilities/jQuery.each - */ - public static function each($object, $callback, $param1 = null, $param2 = null, $param3 = null) { - $paramStructure = null; - if (func_num_args() > 2) { - $paramStructure = func_get_args(); - $paramStructure = array_slice($paramStructure, 2); - } - if (is_object($object) && ! ($object instanceof Iterator)) { - foreach(get_object_vars($object) as $name => $value) - phpQuery::callbackRun($callback, array($name, $value), $paramStructure); - } else { - foreach($object as $name => $value) - phpQuery::callbackRun($callback, array($name, $value), $paramStructure); - } - } - /** - * - * @link http://docs.jquery.com/Utilities/jQuery.map - */ - public static function map($array, $callback, $param1 = null, $param2 = null, $param3 = null) { - $result = array(); - $paramStructure = null; - if (func_num_args() > 2) { - $paramStructure = func_get_args(); - $paramStructure = array_slice($paramStructure, 2); - } - foreach($array as $v) { - $vv = phpQuery::callbackRun($callback, array($v), $paramStructure); -// $callbackArgs = $args; -// foreach($args as $i => $arg) { -// $callbackArgs[$i] = $arg instanceof CallbackParam -// ? $v -// : $arg; -// } -// $vv = call_user_func_array($callback, $callbackArgs); - if (is_array($vv)) { - foreach($vv as $vvv) - $result[] = $vvv; - } else if ($vv !== null) { - $result[] = $vv; - } - } - return $result; - } - /** - * - * @param $callback Callback - * @param $params - * @param $paramStructure - * @return unknown_type - */ - public static function callbackRun($callback, $params = array(), $paramStructure = null) { - if (! $callback) - return; - if ($callback instanceof CallbackParameterToReference) { - // TODO support ParamStructure to select which $param push to reference - if (isset($params[0])) - $callback->callback = $params[0]; - return true; - } - if ($callback instanceof Callback) { - $paramStructure = $callback->params; - $callback = $callback->callback; - } - if (! $paramStructure) - return call_user_func_array($callback, $params); - $p = 0; - foreach($paramStructure as $i => $v) { - $paramStructure[$i] = $v instanceof CallbackParam - ? $params[$p++] - : $v; - } - return call_user_func_array($callback, $paramStructure); - } - /** - * Merge 2 phpQuery objects. - * @param array $one - * @param array $two - * @protected - * @todo node lists, phpQueryObject - */ - public static function merge($one, $two) { - $elements = $one->elements; - foreach($two->elements as $node) { - $exists = false; - foreach($elements as $node2) { - if ($node2->isSameNode($node)) - $exists = true; - } - if (! $exists) - $elements[] = $node; - } - return $elements; -// $one = $one->newInstance(); -// $one->elements = $elements; -// return $one; - } - /** - * - * @param $array - * @param $callback - * @param $invert - * @return unknown_type - * @link http://docs.jquery.com/Utilities/jQuery.grep - */ - public static function grep($array, $callback, $invert = false) { - $result = array(); - foreach($array as $k => $v) { - $r = call_user_func_array($callback, array($v, $k)); - if ($r === !(bool)$invert) - $result[] = $v; - } - return $result; - } - public static function unique($array) { - return array_unique($array); - } - /** - * - * @param $function - * @return unknown_type - * @TODO there are problems with non-static methods, second parameter pass it - * but doesnt verify is method is really callable - */ - public static function isFunction($function) { - return is_callable($function); - } - public static function trim($str) { - return trim($str); - } - /* PLUGINS NAMESPACE */ - /** - * - * @param $url - * @param $callback - * @param $param1 - * @param $param2 - * @param $param3 - * @return phpQueryObject - */ - public static function browserGet($url, $callback, $param1 = null, $param2 = null, $param3 = null) { - if (self::plugin('WebBrowser')) { - $params = func_get_args(); - return self::callbackRun(array(self::$plugins, 'browserGet'), $params); - } else { - self::debug('WebBrowser plugin not available...'); - } - } - /** - * - * @param $url - * @param $data - * @param $callback - * @param $param1 - * @param $param2 - * @param $param3 - * @return phpQueryObject - */ - public static function browserPost($url, $data, $callback, $param1 = null, $param2 = null, $param3 = null) { - if (self::plugin('WebBrowser')) { - $params = func_get_args(); - return self::callbackRun(array(self::$plugins, 'browserPost'), $params); - } else { - self::debug('WebBrowser plugin not available...'); - } - } - /** - * - * @param $ajaxSettings - * @param $callback - * @param $param1 - * @param $param2 - * @param $param3 - * @return phpQueryObject - */ - public static function browser($ajaxSettings, $callback, $param1 = null, $param2 = null, $param3 = null) { - if (self::plugin('WebBrowser')) { - $params = func_get_args(); - return self::callbackRun(array(self::$plugins, 'browser'), $params); - } else { - self::debug('WebBrowser plugin not available...'); - } - } - /** - * - * @param $code - * @return string - */ - public static function php($code) { - return self::code('php', $code); - } - /** - * - * @param $type - * @param $code - * @return string - */ - public static function code($type, $code) { - return "<$type>"; - } - - public static function __callStatic($method, $params) { - return call_user_func_array( - array(phpQuery::$plugins, $method), - $params - ); - } - protected static function dataSetupNode($node, $documentID) { - // search are return if alredy exists - foreach(phpQuery::$documents[$documentID]->dataNodes as $dataNode) { - if ($node->isSameNode($dataNode)) - return $dataNode; - } - // if doesn't, add it - phpQuery::$documents[$documentID]->dataNodes[] = $node; - return $node; - } - protected static function dataRemoveNode($node, $documentID) { - // search are return if alredy exists - foreach(phpQuery::$documents[$documentID]->dataNodes as $k => $dataNode) { - if ($node->isSameNode($dataNode)) { - unset(self::$documents[$documentID]->dataNodes[$k]); - unset(self::$documents[$documentID]->data[ $dataNode->dataID ]); - } - } - } - public static function data($node, $name, $data, $documentID = null) { - if (! $documentID) - // TODO check if this works - $documentID = self::getDocumentID($node); - $document = phpQuery::$documents[$documentID]; - $node = self::dataSetupNode($node, $documentID); - if (! isset($node->dataID)) - $node->dataID = ++phpQuery::$documents[$documentID]->uuid; - $id = $node->dataID; - if (! isset($document->data[$id])) - $document->data[$id] = array(); - if (! is_null($data)) - $document->data[$id][$name] = $data; - if ($name) { - if (isset($document->data[$id][$name])) - return $document->data[$id][$name]; - } else - return $id; - } - public static function removeData($node, $name, $documentID) { - if (! $documentID) - // TODO check if this works - $documentID = self::getDocumentID($node); - $document = phpQuery::$documents[$documentID]; - $node = self::dataSetupNode($node, $documentID); - $id = $node->dataID; - if ($name) { - if (isset($document->data[$id][$name])) - unset($document->data[$id][$name]); - $name = null; - foreach($document->data[$id] as $name) - break; - if (! $name) - self::removeData($node, $name, $documentID); - } else { - self::dataRemoveNode($node, $documentID); - } - } -} -/** - * Plugins static namespace class. - * - * @author Tobiasz Cudnik - * @package phpQuery - * @todo move plugin methods here (as statics) - */ -class phpQueryPlugins { - public function __call($method, $args) { - if (isset(phpQuery::$extendStaticMethods[$method])) { - $return = call_user_func_array( - phpQuery::$extendStaticMethods[$method], - $args - ); - } else if (isset(phpQuery::$pluginsStaticMethods[$method])) { - $class = phpQuery::$pluginsStaticMethods[$method]; - $realClass = "phpQueryPlugin_$class"; - $return = call_user_func_array( - array($realClass, $method), - $args - ); - return isset($return) - ? $return - : $this; - } else - throw new Exception("Method '{$method}' doesnt exist"); - } -} -/** - * Shortcut to phpQuery::pq($arg1, $context) - * Chainable. - * - * @see phpQuery::pq() - * @return phpQueryObject|QueryTemplatesSource|QueryTemplatesParse|QueryTemplatesSourceQuery - * @author Tobiasz Cudnik - * @package phpQuery - */ -function pq($arg1, $context = null) { - $args = func_get_args(); - return call_user_func_array( - array('phpQuery', 'pq'), - $args - ); -} -// add plugins dir and Zend framework to include path -set_include_path( - get_include_path() - .PATH_SEPARATOR.dirname(__FILE__).'/phpQuery/' - .PATH_SEPARATOR.dirname(__FILE__).'/phpQuery/plugins/' -); -// why ? no __call nor __get for statics in php... -// XXX __callStatic will be available in PHP 5.3 -phpQuery::$plugins = new phpQueryPlugins(); -// include bootstrap file (personal library config) -if (file_exists(dirname(__FILE__).'/phpQuery/bootstrap.php')) - require_once dirname(__FILE__).'/phpQuery/bootstrap.php'; diff --git a/vendor/owner888/phpspider/library/rolling_curl.php b/vendor/owner888/phpspider/library/rolling_curl.php deleted file mode 100644 index fd8b81f..0000000 --- a/vendor/owner888/phpspider/library/rolling_curl.php +++ /dev/null @@ -1,466 +0,0 @@ - - * @copyright seatle - * @link http://www.epooll.com/ - * @license http://www.opensource.org/licenses/mit-license.php MIT License - */ - -class rolling_curl -{ - /** - * @var float - * - * 同时运行任务数 - * 例如:有8个请求,则会被分成两批,第一批5个请求,第二批3个请求 - * 注意:采集知乎的时候,5个是比较稳定的,7个以上就开始会超时了,多进程就没有这样的问题,因为多进程很少几率会发生并发 - */ - public $window_size = 5; - - /** - * @var float - * - * Timeout is the timeout used for curl_multi_select. - */ - private $timeout = 10; - - /** - * @var string|array - * - * 应用在每个请求的回调函数 - */ - public $callback; - - /** - * @var array - * - * 设置默认的请求参数 - */ - protected $options = array( - CURLOPT_SSL_VERIFYPEER => 0, - CURLOPT_RETURNTRANSFER => 1, - // 注意:TIMEOUT = CONNECTTIMEOUT + 数据获取时间,所以 TIMEOUT 一定要大于 CONNECTTIMEOUT,否则 CONNECTTIMEOUT 设置了就没意义 - // "Connection timed out after 30001 milliseconds" - CURLOPT_CONNECTTIMEOUT => 30, - CURLOPT_TIMEOUT => 60, - CURLOPT_RETURNTRANSFER => 1, - CURLOPT_HEADER => 0, - // 在多线程处理场景下使用超时选项时,会忽略signals对应的处理函数,但是无耐的是还有小概率的crash情况发生 - CURLOPT_NOSIGNAL => 1, - CURLOPT_USERAGENT => "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.89 Safari/537.36", - ); - - /** - * @var array - */ - private $headers = array(); - - /** - * @var Request[] - * - * 请求队列 - */ - private $requests = array(); - - /** - * @var RequestMap[] - * - * Maps handles to request indexes - */ - private $requestMap = array(); - - public function __construct() - { - } - - /** - * set timeout - * - * @param init $timeout - * @return - */ - public function set_timeout($timeout) - { - $this->options[CURLOPT_TIMEOUT] = $timeout; - } - - /** - * set proxy - * - */ - public function set_proxy($proxy) - { - $this->options[CURLOPT_PROXY] = $proxy; - } - - /** - * set referer - * - */ - public function set_referer($referer) - { - $this->options[CURLOPT_REFERER] = $referer; - } - - /** - * 设置 user_agent - * - * @param string $useragent - * @return void - */ - public function set_useragent($useragent) - { - $this->options[CURLOPT_USERAGENT] = $useragent; - } - - /** - * 设置COOKIE - * - * @param string $cookie - * @return void - */ - public function set_cookie($cookie) - { - $this->options[CURLOPT_COOKIE] = $cookie; - } - - /** - * 设置COOKIE JAR - * - * @param string $cookie_jar - * @return void - */ - public function set_cookiejar($cookiejar) - { - $this->options[CURLOPT_COOKIEJAR] = $cookiejar; - } - - /** - * 设置COOKIE FILE - * - * @param string $cookie_file - * @return void - */ - public function set_cookiefile($cookiefile) - { - $this->options[CURLOPT_COOKIEFILE] = $cookiefile; - } - - /** - * 获取内容的时候是不是连header也一起获取 - * - * @param mixed $http_raw - * @return void - * @author seatle - * @created time :2016-09-18 10:17 - */ - public function set_http_raw($http_raw = false) - { - $this->options[CURLOPT_HEADER] = $http_raw; - } - - /** - * 设置IP - * - * @param string $ip - * @return void - */ - public function set_ip($ip) - { - $headers = array( - 'CLIENT-IP'=>$ip, - 'X-FORWARDED-FOR'=>$ip, - ); - $this->headers = $this->headers + $headers; - } - - /** - * 设置Headers - * - * @param string $headers - * @return void - */ - public function set_headers($headers) - { - $this->headers = $this->headers + $headers; - } - - /** - * 设置Hosts - * - * @param string $hosts - * @return void - */ - public function set_hosts($hosts) - { - $headers = array( - 'Host'=>$hosts, - ); - $this->headers = $this->headers + $headers; - } - - /** - * 设置Gzip - * - * @param string $hosts - * @return void - */ - public function set_gzip($gzip) - { - if ($gzip) - { - $this->options[CURLOPT_ENCODING] = 'gzip'; - } - } - - public function request($url, $method = "GET", $fields = array(), $headers = array(), $options = array()) - { - $this->requests[] = array('url'=>$url,'method'=>$method,'fields'=>$fields,'headers'=>$headers,'options'=>$options); - return true; - } - - public function get_options($request) - { - $options = $this->options; - $headers = $this->headers; - - if (ini_get('safe_mode') == 'Off' || !ini_get('safe_mode')) - { - $options[CURLOPT_FOLLOWLOCATION] = 1; - $options[CURLOPT_MAXREDIRS] = 5; - } - - // 如果是 get 方式,直接拼凑一个 url 出来 - if (strtolower($request['method']) == 'get' && !empty($request['fields'])) - { - $url = $request['url'] . "?" . http_build_query($request['fields']); - } - // 如果是 post 方式 - if (strtolower($request['method']) == 'post') - { - $options[CURLOPT_POST] = 1; - $options[CURLOPT_POSTFIELDS] = $request['fields']; - } - - // append custom options for this specific request - if ($request['options']) - { - $options = $request['options'] + $options; - } - - if ($request['headers']) - { - $headers = $request['headers'] + $headers; - } - - // 随机绑定 hosts,做负载均衡 - //if (self::$hosts) - //{ - //$parse_url = parse_url($url); - //$host = $parse_url['host']; - //$key = rand(0, count(self::$hosts)-1); - //$ip = self::$hosts[$key]; - //$url = str_replace($host, $ip, $url); - //self::$headers = array_merge( array('Host:'.$host), self::$headers ); - //} - - // header 要这样拼凑 - $headers_tmp = array(); - foreach ($headers as $k=>$v) - { - $headers_tmp[] = $k.":".$v; - } - $headers = $headers_tmp; - - $options[CURLOPT_URL] = $request['url']; - $options[CURLOPT_HTTPHEADER] = $headers; - - return $options; - } - - /** - * GET 请求 - * - * @param string $url - * @param array $headers - * @param array $options - * @return bool - */ - public function get($url, $fields = array(), $headers = array(), $options = array()) - { - return $this->request($url, 'get', $fields, $headers, $options); - } - - /** - * $fields 有三种类型:1、数组;2、http query;3、json - * 1、array('name'=>'yangzetao') 2、http_build_query(array('name'=>'yangzetao')) 3、json_encode(array('name'=>'yangzetao')) - * 前两种是普通的post,可以用$_POST方式获取 - * 第三种是post stream( json rpc,其实就是webservice ),虽然是post方式,但是只能用流方式 http://input 后者 $HTTP_RAW_POST_DATA 获取 - * - * @param string $url - * @param array $fields - * @param array $headers - * @param array $options - * @return void - */ - public function post($url, $fields = array(), $headers = array(), $options = array()) - { - return $this->request($url, 'post', $fields, $headers, $options); - } - - /** - * Execute processing - * - * @param int $window_size Max number of simultaneous connections - * @return string|bool - */ - public function execute($window_size = null) - { - $count = sizeof($this->requests); - if ($count == 0) - { - return false; - } - // 只有一个请求 - elseif ($count == 1) - { - return $this->single_curl(); - } - else - { - // 开始 rolling curl,window_size 是最大同时连接数 - return $this->rolling_curl($window_size); - } - } - - private function single_curl() - { - $ch = curl_init(); - // 从请求队列里面弹出一个来 - $request = array_shift($this->requests); - $options = $this->get_options($request); - curl_setopt_array($ch, $options); - $output = curl_exec($ch); - $info = curl_getinfo($ch); - $error = null; - if ($output === false) - { - $error = curl_error( $ch ); - } - //$output = substr($output, 10); - //$output = gzinflate($output); - - // 其实一个请求的时候没是么必要回调,直接返回数据就好了,不过这里算是多一个功能吧,和多请求保持一样的操作 - if ($this->callback) - { - if (is_callable($this->callback)) - { - call_user_func($this->callback, $output, $info, $request, $error); - } - } - else - { - return $output; - } - return true; - } - - private function rolling_curl($window_size = null) - { - // 如何设置了最大任务数 - if ($window_size) - $this->window_size = $window_size; - - // 如果请求数 小于 任务数,设置任务数为请求数 - if (sizeof($this->requests) < $this->window_size) - $this->window_size = sizeof($this->requests); - - // 如果任务数小于2个,不应该用这个方法的,用上面的single_curl方法就好了 - if ($this->window_size < 2) - exit("Window size must be greater than 1"); - - // 初始化任务队列 - $master = curl_multi_init(); - - // 开始第一批请求 - for ($i = 0; $i < $this->window_size; $i++) - { - $ch = curl_init(); - $options = $this->get_options($this->requests[$i]); - curl_setopt_array($ch, $options); - curl_multi_add_handle($master, $ch); - // 添加到请求数组 - $key = (string) $ch; - $this->requestMap[$key] = $i; - } - - do { - while (($execrun = curl_multi_exec($master, $running)) == CURLM_CALL_MULTI_PERFORM) ; - - // 如果 - if ($execrun != CURLM_OK) { break; } - - // 一旦有一个请求完成,找出来,因为curl底层是select,所以最大受限于1024 - while ($done = curl_multi_info_read($master)) - { - // 从请求中获取信息、内容、错误 - $info = curl_getinfo($done['handle']); - $output = curl_multi_getcontent($done['handle']); - $error = curl_error($done['handle']); - - // 如果绑定了回调函数 - $callback = $this->callback; - if (is_callable($callback)) - { - $key = (string) $done['handle']; - $request = $this->requests[$this->requestMap[$key]]; - unset($this->requestMap[$key]); - call_user_func($callback, $output, $info, $request, $error); - } - - // 一个请求完了,就加一个进来,一直保证5个任务同时进行 - if ($i < sizeof($this->requests) && isset($this->requests[$i]) && $i < count($this->requests)) - { - $ch = curl_init(); - $options = $this->get_options($this->requests[$i]); - curl_setopt_array($ch, $options); - curl_multi_add_handle($master, $ch); - - // 添加到请求数组 - $key = (string) $ch; - $this->requestMap[$key] = $i; - $i++; - } - // 把请求已经完成了得 curl handle 删除 - curl_multi_remove_handle($master, $done['handle']); - } - - // 当没有数据的时候进行堵塞,把 CPU 使用权交出来,避免上面 do 死循环空跑数据导致 CPU 100% - if ($running) - { - curl_multi_select($master, $this->timeout); - } - - } while ($running); - // 关闭任务 - curl_multi_close($master); - - // 把请求清空,否则没有重新 new rolling_curl(); 直接再次导入一批url的时候,就会把前面已经执行过的url又执行一轮 - unset($this->requests); - return true; - } - - /** - * @return void - */ - public function __destruct() - { - unset($this->window_size, $this->callback, $this->options, $this->headers, $this->requests); - } -} diff --git a/vendor/owner888/phpspider/test.php b/vendor/owner888/phpspider/test.php deleted file mode 100644 index 79e7d24..0000000 --- a/vendor/owner888/phpspider/test.php +++ /dev/null @@ -1,7 +0,0 @@ -addServer('10.10.10.238'); -$gmworker->addFunction("reverse", "reverse_fn"); - -print "Waiting for job...\n"; -while($gmworker->work()) -{ - if ($gmworker->returnCode() != GEARMAN_SUCCESS) - { - echo "return_code: " . $gmworker->returnCode() . "\n"; - break; - } - //break; -} - -function reverse_fn($job) -{ - sleep(3); - echo $job->workload()."\n"; - return strrev($job->workload()); -} - - -echo "hello\n"; -?> - - - diff --git a/wiki/Home.md b/wiki/Home.md deleted file mode 100644 index 2c41468..0000000 --- a/wiki/Home.md +++ /dev/null @@ -1,21 +0,0 @@ -## 功能 -IYUU自动辅种工具,目前能对国内大部分的PT站点自动辅种;支持下载器集群,支持多盘位,支持多下载目录,支持远程连接等。 - -## 原理 -IYUU自动辅种工具(英文名:iyuuAutoReseed),是一款PHP语言编写的Private Tracker辅种脚本,通过计划任务或常驻内存,按指定频率调用transmission、qBittorrent下载软件的API接口,提取正在做种的info_hash提交到服务器API接口,根据API接口返回的数据拼接种子连接,提交给下载器,自动辅种各个站点。 - -## 运行环境 -所有具备PHP运行环境的所有平台! -例如:Linux、Windows、MacOS - -## 需求提交/错误反馈 - - 点击链接加入群聊【IYUU自动辅种交流】:[https://jq.qq.com/?_wv=1027&k=5JOfOlM][1] - -## 捐助开发者 -如果觉得我的付出,节约了您的宝贵时间,请随意打赏一杯咖啡!或者一杯水! -您所有的打赏将用于服务器续期,增加服务的延续性。 -![微信打赏.png][2] - - -[1]: https://jq.qq.com/?_wv=1027&k=5JOfOlM -[2]: https://www.iyuu.cn/usr/uploads/2019/12/801558607.png \ No newline at end of file diff --git a/wiki/合作站点鉴权配置.md b/wiki/合作站点鉴权配置.md deleted file mode 100644 index 4360b81..0000000 --- a/wiki/合作站点鉴权配置.md +++ /dev/null @@ -1,27 +0,0 @@ -## 重点讲解Ourbits站点的鉴权配置 -博客链接:https://www.iyuu.cn/archives/337/ -IYUU自动辅种工具、Ourbits双方达成合作,可以对使用接口的用户,实现认证。 -### 申请爱语飞飞微信通知token,新用户访问:http://iyuu.cn 申请! -1.点击`开始使用`,出现二维码,用`微信扫码` -![微信通知1.png][1] -![微信通知2.png][2] -![微信通知3.png][3] -2.复制您的token令牌到`/app/config/config.php`文件内的`iyuu.cn`对应的配置字段,保存。如图: -![微信通知4.png][4] - -### 设置Ourbits: -![编辑配置4.png][5] -`passkey`,在你的控制面板 - 密钥 -`is_vip`,根据你的实际情况填写,因站点有下载种子的流控,如果你不在限制之列,可以`设置为1` -`id`,为用户中心打开后,浏览器地址栏**http://xxxxx.xxx/userdetails.php?id=`46880`**等号=后面的几个数字,如图: -![编辑配置6.png][6] - -到此,配置文件编辑完毕,请记得保存。 -如果提示保存格式,请保存为UTF8(无BOM)格式。 - - [1]: https://www.iyuu.cn/usr/uploads/2019/12/2331433923.png - [2]: https://www.iyuu.cn/usr/uploads/2019/12/3324442680.png - [3]: https://www.iyuu.cn/usr/uploads/2019/12/3181272964.png - [4]: https://www.iyuu.cn/usr/uploads/2019/12/3669828008.png - [5]: https://www.iyuu.cn/usr/uploads/2019/12/3696916642.png - [6]: https://www.iyuu.cn/usr/uploads/2019/12/1230288911.png \ No newline at end of file diff --git a/wiki/命令汇总.md b/wiki/命令汇总.md deleted file mode 100644 index dd99eb6..0000000 --- a/wiki/命令汇总.md +++ /dev/null @@ -1,10 +0,0 @@ -## 【特别提示】 -php命令与脚本路径之间是有个空格,请注意!请注意!请注意! - - -## IYUU自动辅种命令: -`php ./iyuu.cn.php` - - - -### 【重要说明:实际路径,以你实际的为准,切勿生搬硬套!】 \ No newline at end of file diff --git a/wiki/常见问题.md b/wiki/常见问题.md deleted file mode 100644 index 1e00071..0000000 --- a/wiki/常见问题.md +++ /dev/null @@ -1,83 +0,0 @@ -## 常见问题FAQ - -#### 问:这款脚本会不会泄露我的秘钥、cookie、客户端连接密码? - -答:绝对不会!!代码全开源,能经受审查!所有私密配置只在本地存储使用,绝不会发送给任何第三方。 - -#### 问:只使用IYUU自动辅种,需要配置各站的cookie吗? - -答:只需配置全局客户端和各网站的passkey密钥(没有配置passkey的站点,在辅种时候会跳过)。2019年12月28日补充:辅种hdcity、hdchina需要配置cookie。 - -#### 问:IYUU自动辅种工具,向服务器发送了什么实现自动辅种呢? - -答:1.文件`phpspider\app\torrent\cache\hashString.txt`是脚本发送给服务器的数据,是按下载器分组的种子info_hash;2.文件`phpspider\app\torrent\cache\reseed.txt`是服务器返回的可辅种数据。 - -#### 问:本次添加成功的辅种任务,下次辅种时还会重复添加吗? - -答:添加成功的辅种任务,会在本地生成缓存记录,避免重复添加辅种任务,路径在:`phpspider\app\torrent\cachehash`。 - -#### 问:为什么有些站点自动跳过? - -答:因为站点在下载种子时有流控或者人机验证,会导致辅种失败;但脚本会在`phpspider\app\torrent\cache`目录下生成以站点命名的手动辅种文本。 - -#### 问:我拥有辅种时自动跳过站点的特殊权限,如何设置为可以辅种呢? - -答:在站点的独立配置区域,添加一行代码`'is_vip' => 1,`即可。例如Ourbits: - -```php -// ourbits -'ourbits' => array( - // 如果需要用下载免费种脚本,须配置(只是自动辅种,可以不配置此项) - 'cookie' => '', - // 如果需要自动辅种,必须配置 - 'passkey' => '', - 'id' => 46880, // 用户ID - 'is_vip' => 1, // 是否具有VIP或特殊权限?0 普通,1 VIP -), -``` - - - -#### 问:如何升级到最新版本? - -答:从github或码云仓库,下载最新的源码,覆盖到本地即可。 - -#### 问:为啥我编辑配置后,运行的时候显示乱码? - -答:保存的编码格式不对,正确的格式为UTF8(无BOM);推荐编辑器:`VS code`、`EditPlu`s、`SublimeText`、`Notepad++`。 - -#### 问:为什么用IYUU自动辅种,有些种子无法校验通过? - -答:首先,这个属于正常现象。 只要IYUU自动辅种匹配过来,然后校验通不过的,分为以下几种情况: - -1、被改了文件名,重新做种 - -2、被改了顶层目录名,重新做种 - -3、把单文件放进了目录里面,重新做种 - -4、更改了部分文件,例如nfo文件,重新做种 - -一般情况下,通过分析种子结构,创建软连接,90%以上都可以辅种成功。 -也可以不管他或删除校验失败的任务(不要删除数据)。 - -#### 问:如何创建软连接、硬链接手动辅种? - -答:Windows命令: `mklink`, Linux命令: `ln -s`, 更详细的用法请百度。 - -#### 问:IYUU自动辅种,添加计划任务后多久运行一次比较合适? - -答:为减轻服务器压力,推荐间隔3小时以上(太频繁的调用接口,可能被封禁)。 - -#### 问:猫站的Tracker为啥是http,而不是https? - -答:请退出登录,在登录时勾选下面两个SSL的选项,登录后复制cookie,重新配置。 - -#### 问:如何反馈问题? - -答:1、点击链接加入群聊【IYUU自动辅种交流】:[https://jq.qq.com/?_wv=1027&k=5JOfOlM][1] - -2、QQ群:859882209 - -3、issues: https://gitee.com/ledc/IYUUAutoReseed/issues - diff --git a/wiki/开发计划.md b/wiki/开发计划.md deleted file mode 100644 index 08d45c1..0000000 --- a/wiki/开发计划.md +++ /dev/null @@ -1,15 +0,0 @@ -## 开发计划 -| 功能 | 开发状态 | 预计开发时间 | 开发完成时间 | -| - | :-: | ---- | ---- | -| 微信鉴权 | 已完成 | 2019年12月22日 | 2019年12月23日 | -| 流控站点,手动辅种 | 已完成 | 2019年12月24日 | 2019年12月24日 | -| m-team IPv4、IPv6自定义配置 | 已完成 | 2019年12月25日 | 2019年12月25日 | -| 未配置客户端智能过滤 | 已完成 | 2019年12月25日 | 2019年12月25日 | -| 自动辅种结束微信通知 | 已完成 | 2019年12月25日 | 2019年12月27日 | -| 做种客户端间转移 | 已完成 | 2019年12月25日 | 2020年1月14日 | -| 手动辅种按目录分组 | 已完成 | 2019年12月26日 | 2020年1月14日 | -| WEB页面生成配置 | 暂未开始 | | | -| 自动转移客户端 | 暂未开始 | | | -| 脚本docker容器化 | 暂未开始 | | | -| 浏览器插件 | 暂未开始 | | | -| 合集自动拆包辅种 | 暂未开始 | | | diff --git a/wiki/数据同步.md b/wiki/数据同步.md deleted file mode 100644 index ee4d902..0000000 --- a/wiki/数据同步.md +++ /dev/null @@ -1,5 +0,0 @@ -## 辅种数据最后同步时间 -|站点 | 同步时间 | 状态 | -| - | :-: | ---- | -| ourbits | 2020年1月15日13:56:22 | 同步中... | -| hdsky | 2020年1月15日20:49:33 | 同步中... | diff --git a/wiki/更新历史.md b/wiki/更新历史.md deleted file mode 100644 index b1c6895..0000000 --- a/wiki/更新历史.md +++ /dev/null @@ -1,109 +0,0 @@ -### 2020年1月14日 - -更新hdbug域名,删除下载免费种冗余文件。 - -### 2020年1月10日 - -修复:qBittorrent打开自动管理时,自动辅种目录对应错误的问题。 - -### 2020年1月9日 - -优化:萌猫tracker的IP类型改为可配置; -优化:自动辅种时添加的任务,校验后自动暂停(无需更改全局)。 - -### 2020年1月5日 - -修复:城市cuhash变化无法辅种的问题 - -### 2020年1月1日 - -新增:scg - -修复: - -1.转移客户端做种支持磁力链 - -2.萌猫抓取问题 - -3.瓷器抓取标题的问题 - -### 2019年12月27日 - -新增功能:自动辅种结束,微信通知统计信息,优化城市适配,新增discfan(GZT)。 - -### 2019年12月25日 - -1.新增支持upxin(HDU)、oshen - ------- - -### 2019年12月25日 -1.馒头支持ipv4、ipv6选择 -2.未配置的全局客户端智能过滤,不会再影响自动辅种 - ------- - -### 2019年12月24日 -新增hdstreet、joyhd、u2 - ------- - -### 2019年12月23日 -鉴权模式上线试运行 - ------- - -### 2019年12月21日 - -新增兽站、opencd、hdbug; - ------- - -### 2019年12月20日 - -新增1ptba、hdtime - ------- - -### 2019年12月17日 - -新增站点瓷器; - ------- - -### 2019年12月16日 - -新增leaguehd、聆音; - ------- - -### 2019年12月15日 - -1.自动辅种20个站; -2.支持qBittorrent做种转transmission -3.新增qBittorrent自动辅种时的状态过滤,只辅种已完成的种子 - ------- - -### 2019年12月12日 - -目前支持17个站点的自动辅种; -目前支持18个站点下载免费种; - ------- - -### 2019年12月10日 - -自动辅种工具完成! - ------- - -### 2019年11月19日 - -我堡、天空 完美适配,支持大小、做种数、下载数筛选。 - ------- - -技术讨论及后续更新,请加入QQ群! -**群名称:IYUU自动辅种交流** -**QQ群号:859882209** \ No newline at end of file diff --git a/wiki/自动辅种最简配置(windows篇).md b/wiki/自动辅种最简配置(windows篇).md deleted file mode 100644 index 6dda3bd..0000000 --- a/wiki/自动辅种最简配置(windows篇).md +++ /dev/null @@ -1,102 +0,0 @@ -以下教程以windows为基础进行讲解,其他系统同理。 -博客链接:https://www.iyuu.cn/archives/324/ -## 第一步 下载压缩包 -从[码云仓库][1],下载最新源码,解压缩到D盘的根目录下。 - -## 第二步 复制一份配置文件 -打开`D:\IYUUAutoReseed\app\config`目录,复制一份`config.sample.php`,另存为`config.php`。 - -这样操作后,需要升级新版本时,直接覆盖即可,不会影响到配置。 - -## 第三步 编辑配置文件 -提醒:千万不要用windows记事本来编辑配置文件(会导致乱码)!! -推荐编辑软件:`VS code`、`EditPlus`、`SublimeText`、`Notepad++`等(保存格式,选UTF8 无BOM); -配置文件内容较多,新手往往很迷茫,不知道改哪里,在这里我重点强调2个步骤: -`1.编辑全局客户端; 2.编辑各站的秘钥,即passkey。` - -其他配置,如果不懂也没有关系;先保持默认,等脚本运行起来,再修改也不迟。另外,修改时一定要细心,仔细看教程。 -打开`D:\IYUUAutoReseed\app\config\config.php`文件,如下图: -![编辑配置1.png][2] - -### 填写全局客户端 -上图红框内的是`transmission`的示例配置,绿框是`qBittorrent`的示例配置; -IYUU自动辅种工具,目前支持这两种下载器,支持多盘位,辅种时全自动对应资源的下载目录。 -1,编辑`transmission`下载器 -`http://127.0.0.1:9091/transmission/rpc`是下载器的连接参数,你要修改的部分是`127.0.0.1:9091`改成你的IP与端口(本机使用无需修改),局域网内的机器请填写局域网IP与端口;远程使用请填写DDNS的远程连接域名与端口。 -username是用户名、password是密码。 -如果你没有用到`transmission`下载器,请把红框的内容都删除。 - -2,编辑`qBittorrent`下载器 -方法与上一步相同,只需填写ip、端口、用户名、密码即可。如果您是windows下的qBittorrent,请参考下图打开`WEB用户界面`: -![qb设置WEB用户界面.png][3] - -因为我两个下载器都在用,编辑好后,如图: -![编辑配置2.png][4] - -### 填写各站秘钥passkey -IYUU自动辅种:需要您配置各站的passkey(没有配置passkey的站点会自动跳过)。 -从各站点的控制面板,找到您的`秘钥`复制粘贴过来即可。 -配置好后如图: -![编辑配置3.png][5] - ----------- - - -## 第四步,重点讲解Ourbits站点的配置 -IYUU自动辅种工具、Ourbits双方达成合作,可以对使用接口的用户,实现认证。 -### 申请爱语飞飞微信通知token,新用户访问:http://iyuu.cn 申请! -1.点击`开始使用`,出现二维码,用`微信扫码` -![微信通知1.png][6] -![微信通知2.png][7] -![微信通知3.png][8] -2.复制您的token令牌到`/app/config/config.php`文件内的`iyuu.cn`对应的配置字段,保存。如图: -![微信通知4.png][9] - -### 设置Ourbits: -![编辑配置4.png][10] -`passkey`,在你的控制面板 - 密钥 -`is_vip`,根据你的实际情况填写,因站点有下载种子的流控,如果你不在限制之列,可以`设置为1` -`id`,为用户中心打开后,浏览器地址栏**http://xxxxx.xxx/userdetails.php?id=`46880`**等号=后面的几个数字,如图: -![编辑配置6.png][11] - -到此,配置文件编辑完毕,请记得保存。 -如果提示保存格式,请保存为UTF8(无BOM)格式。 - ------- - -## 群晖、铁威马、威联通等Linux环境 - -经过上面步骤,其实已经完成了配置,只需要把脚本复制到设备内,用php命令运行脚本即可。 - -群晖php命令:`php` - -威联通php命令:`/mnt/ext/opt/apache/bin/php` - -铁威马php命令:`php` - ----------- - -## Windows安装PHP运行环境 -也可以去官方下载【https://www.php.net/downloads】,官方下载的记得开启`curl、fileinfo、mbstring`,这3个扩展。 -另外我打包了一份,下载地址: -微云链接:https://share.weiyun.com/5EiXLfn 密码:ezsvnb -下载回来是一个ZIP压缩包,解压到`D:\IYUUAutoReseed\`目录内,文件结构如图: -![编辑配置7.png][12] -点击红框内`执行辅种`即可。 -如果你前期严格按照配置一步步操作,这里会正常显示跑动的辅种列表。正常如图: -![编辑配置8.png][13] - - -[1]: https://gitee.com/ledc/IYUUAutoReseed -[2]: https://www.iyuu.cn/usr/uploads/2019/12/2720183833.png -[3]: https://www.iyuu.cn/usr/uploads/2019/12/405587689.png -[4]: https://www.iyuu.cn/usr/uploads/2019/12/441257656.png -[5]: https://www.iyuu.cn/usr/uploads/2019/12/890327305.png -[6]: https://www.iyuu.cn/usr/uploads/2019/12/2331433923.png -[7]: https://www.iyuu.cn/usr/uploads/2019/12/3324442680.png -[8]: https://www.iyuu.cn/usr/uploads/2019/12/3181272964.png -[9]: https://www.iyuu.cn/usr/uploads/2019/12/3669828008.png -[10]: https://www.iyuu.cn/usr/uploads/2019/12/3696916642.png -[11]: https://www.iyuu.cn/usr/uploads/2019/12/1230288911.png -[12]: https://www.iyuu.cn/usr/uploads/2019/12/3189986236.png -[13]: https://www.iyuu.cn/usr/uploads/2019/12/2523845772.png \ No newline at end of file