You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
IYUUAutoReseed/vendor/owner888/phpspider/core/util.php

936 lines
27 KiB

<?php
// +----------------------------------------------------------------------
// | PHPSpider [ A PHP Framework For Crawler ]
// +----------------------------------------------------------------------
// | Copyright (c) 2006-2014 https://doc.phpspider.org All rights reserved.
// +----------------------------------------------------------------------
// | Licensed ( http://www.apache.org/licenses/LICENSE-2.0 )
// +----------------------------------------------------------------------
// | Author: Seatle Yang <seatle@foxmail.com>
// +----------------------------------------------------------------------
//----------------------------------
// PHPSpider实用函数集合类文件
//----------------------------------
namespace phpspider\core;
// 引入PATH_DATA
require_once __DIR__ . '/constants.php';
class util
{
/**
* 文件锁
* 如果没有锁,就加一把锁并且执行逻辑,然后删除锁
* if (!util::lock('statistics_offer'))
* {
* util::lock('statistics_offer');
* ...
* util::unlock('statistics_offer');
* }
* 否则输出锁存在
* else
* {
* echo "process has been locked\n";
* }
*
* @param mixed $lock_name
* @param int $lock_timeout
* @return void
* @author seatle <seatle@foxmail.com>
* @created time :2016-02-18 14:28
*/
public static function lock($lock_name, $lock_timeout = 600)
{
$lock = util::get_file(PATH_DATA."/lock/{$lock_name}.lock");
if ($lock)
{
$time = time() - $lock;
// 还没到10分钟,说明进程还活着
if ($time < $lock_timeout)
{
return true;
}
unlink(PATH_DATA."/lock/{$lock_name}.lock");
}
util::put_file(PATH_DATA."/lock/{$lock_name}.lock", time());
return false;
}
public static function unlock($lock_name)
{
unlink(PATH_DATA."/lock/{$lock_name}.lock");
}
public static function time2second($time, $is_log = true)
{
if(is_numeric($time))
{
$value = array(
"years" => 0, "days" => 0, "hours" => 0,
"minutes" => 0, "seconds" => 0,
);
if($time >= 31556926)
{
$value["years"] = floor($time/31556926);
$time = ($time%31556926);
}
if($time >= 86400)
{
$value["days"] = floor($time/86400);
$time = ($time%86400);
}
if($time >= 3600)
{
$value["hours"] = floor($time/3600);
$time = ($time%3600);
}
if($time >= 60)
{
$value["minutes"] = floor($time/60);
$time = ($time%60);
}
$value["seconds"] = floor($time);
//return (array) $value;
//$t = $value["years"] ."y ". $value["days"] ."d ". $value["hours"] ."h ". $value["minutes"] ."m ".$value["seconds"]."s";
if ($is_log)
{
$t = $value["days"] ."d ". $value["hours"] ."h ". $value["minutes"] ."m ".$value["seconds"]."s";
}
else
{
$t = $value["days"] ." days ". $value["hours"] ." hours ". $value["minutes"] ." minutes";
}
return $t;
}
else
{
return false;
}
}
public static function get_days($day_sta, $day_end = true, $range = 86400)
{
if ($day_end === true) $day_end = date('Y-m-d');
return array_map(function ($time) {
return date('Y-m-d', $time);
}, range(strtotime($day_sta), strtotime($day_end), $range));
}
/**
* 获取文件行数
*
* @param mixed $filepath
* @return void
* @author seatle <seatle@foxmail.com>
* @created time :2016-03-31 21:54
*/
public static function get_file_line($filepath)
{
$line = 0 ;
$fp = fopen($filepath , 'r');
if (!$fp)
{
return 0;
}
//获取文件的一行内容,注意:需要php5才支持该函数;
while( stream_get_line($fp,8192,"\n") ){
$line++;
}
fclose($fp);//关闭文件
return $line;
}
/**
* 获得表数
*
* @param mixed $table_name 表名
* @param mixed $item_value 唯一索引
* @param int $table_num 表数量
* @return void
* @author seatle <seatle@foxmail.com>
* @created time :2015-10-22 23:25
*/
public static function get_table_num($item_value, $table_num = 100)
{
//sha1:返回一个40字符长度的16进制数字
$item_value = sha1(strtolower($item_value));
//base_convert:进制建转换,下面是把16进制转成10进制,方便做除法运算
//str_pad:把字符串填充为指定的长度,下面是在左边加0,表数量大于100就3位,否则2位
$step = $table_num > 100 ? 3 : 2;
$item_value = str_pad(base_convert(substr($item_value, -2), 16, 10) % $table_num, $step, "0", STR_PAD_LEFT);
return $item_value;
}
/**
* 获得表面
*
* @param mixed $table_name 表名
* @param mixed $item_value 唯一索引
* @param int $table_num 表数量
* @return void
* @author seatle <seatle@foxmail.com>
* @created time :2015-10-22 23:25
*/
public static function get_table_name($table_name, $item_value, $table_num = 100)
{
//sha1:返回一个40字符长度的16进制数字
$item_value = sha1(strtolower($item_value));
//base_convert:进制建转换,下面是把16进制转成10进制,方便做除法运算
//str_pad:把字符串填充为指定的长度,下面是在左边加0,共3位
$step = $table_num > 100 ? 3 : 2;
$item_value = str_pad(base_convert(substr($item_value, -2), 16, 10) % $table_num, $step, "0", STR_PAD_LEFT);
return $table_name."_".$item_value;
}
// 获得当前使用内存
public static function memory_get_usage()
{
$memory = memory_get_usage();
return self::format_bytes($memory);
}
// 获得最高使用内存
public static function memory_get_peak_usage()
{
$memory = memory_get_peak_usage();
return self::format_bytes($memory);
}
// 转换大小单位
public static function format_bytes($size)
{
$unit = array('b', 'kb', 'mb', 'gb', 'tb', 'pb');
return @round($size / pow(1024, ($i = floor(log($size, 1024)))), 2) . ' ' . $unit[$i];
}
/**
* 获取数组大小
*
* @param mixed $arr 数组
* @return string
*/
public static function array_size($arr)
{
ob_start();
print_r($arr);
$mem = ob_get_contents();
ob_end_clean();
$mem = preg_replace("/\n +/", "", $mem);
$mem = strlen($mem);
return self::format_bytes($mem);
}
/**
* 数字随机数
*
* @param int $num
* @return void
* @author seatle <seatle@foxmail.com>
* @created time :2016-09-18 10:17
*/
public static function rand_num($num = 7)
{
$rand = "";
for ($i = 0; $i < $num; $i ++)
{
$rand .= mt_rand(0, 9);
}
return $rand;
}
/**
* 字母数字混合随机数
*
* @param int $num
* @return void
* @author seatle <seatle@foxmail.com>
* @created time :2016-09-18 10:17
*/
public static function rand_str($num = 10)
{
$chars = 'abcdefghijklmnopqrstuvwxyz0123456789';
$string = "";
for ($i = 0; $i < $num; $i ++)
{
$string .= substr($chars, rand(0, strlen($chars)), 1);
}
return $string;
}
/**
* 汉字转拼音
*
* @param mixed $str 汉字
* @param int $ishead
* @param int $isclose
* @static
* @access public
* @return string
*/
public static function pinyin($str, $ishead = 0, $isclose = 1)
{
// $str = iconv("utf-8", "gbk//ignore", $str);
$str = mb_convert_encoding($str, "gbk", "utf-8");
global $pinyins;
$restr = '';
$str = trim($str);
$slen = strlen($str);
if ($slen < 2)
{
return $str;
}
if (count($pinyins) == 0)
{
$fp = fopen(PATH_DATA . '/pinyin.dat', 'r');
while (!feof($fp))
{
$line = trim(fgets($fp));
$pinyins[$line[0] . $line[1]] = substr($line, 3, strlen($line) - 3);
}
fclose($fp);
}
for ($i = 0; $i < $slen; $i ++)
{
if (ord($str[$i]) > 0x80)
{
$c = $str[$i] . $str[$i + 1];
$i ++;
if (isset($pinyins[$c]))
{
if ($ishead == 0)
{
$restr .= $pinyins[$c];
}
else
{
$restr .= $pinyins[$c][0];
}
}
else
{
// $restr .= "_";
}
}
else if (preg_match("/[a-z0-9]/i", $str[$i]))
{
$restr .= $str[$i];
}
else
{
// $restr .= "_";
}
}
if ($isclose == 0)
{
unset($pinyins);
}
return $restr;
}
/**
* 生成字母前缀
*
* @param mixed $s0
* @return char
* @author seatle <seatle@foxmail.com>
* @created time :2016-09-18 10:17
*/
public static function letter_first($s0)
{
$firstchar_ord = ord(strtoupper($s0{0}));
if (($firstchar_ord >= 65 and $firstchar_ord <= 91) or ($firstchar_ord >= 48 and $firstchar_ord <= 57)) return $s0{0};
// $s = iconv("utf-8", "gbk//ignore", $s0);
$s = mb_convert_encoding($s0, "gbk", "utf-8");
$asc = ord($s{0}) * 256 + ord($s{1}) - 65536;
if ($asc >= -20319 and $asc <= -20284) return "A";
if ($asc >= -20283 and $asc <= -19776) return "B";
if ($asc >= -19775 and $asc <= -19219) return "C";
if ($asc >= -19218 and $asc <= -18711) return "D";
if ($asc >= -18710 and $asc <= -18527) return "E";
if ($asc >= -18526 and $asc <= -18240) return "F";
if ($asc >= -18239 and $asc <= -17923) return "G";
if ($asc >= -17922 and $asc <= -17418) return "H";
if ($asc >= -17417 and $asc <= -16475) return "J";
if ($asc >= -16474 and $asc <= -16213) return "K";
if ($asc >= -16212 and $asc <= -15641) return "L";
if ($asc >= -15640 and $asc <= -15166) return "M";
if ($asc >= -15165 and $asc <= -14923) return "N";
if ($asc >= -14922 and $asc <= -14915) return "O";
if ($asc >= -14914 and $asc <= -14631) return "P";
if ($asc >= -14630 and $asc <= -14150) return "Q";
if ($asc >= -14149 and $asc <= -14091) return "R";
if ($asc >= -14090 and $asc <= -13319) return "S";
if ($asc >= -13318 and $asc <= -12839) return "T";
if ($asc >= -12838 and $asc <= -12557) return "W";
if ($asc >= -12556 and $asc <= -11848) return "X";
if ($asc >= -11847 and $asc <= -11056) return "Y";
if ($asc >= -11055 and $asc <= -10247) return "Z";
return 0; // null
}
/**
* 获得某天前的时间戳
*
* @param mixed $day
* @return void
* @author seatle <seatle@foxmail.com>
* @created time :2016-09-18 10:17
*/
public static function getxtime($day)
{
$day = intval($day);
return mktime(23, 59, 59, date("m"), date("d") - $day, date("y"));
}
/**
* 读文件
*/
public static function get_file($url, $timeout = 10)
{
if (function_exists('curl_init'))
{
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_HEADER, 0);
curl_setopt($ch, CURLOPT_TIMEOUT, $timeout);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 10);
$content = curl_exec($ch);
curl_close($ch);
if ($content) return $content;
}
$ctx = stream_context_create(array('http' => array('timeout' => $timeout)));
$content = @file_get_contents($url, 0, $ctx);
if ($content) return $content;
return false;
}
/**
* 写文件,如果文件目录不存在,则递归生成
*/
public static function put_file($file, $content, $flag = 0)
{
$pathinfo = pathinfo($file);
if (!empty($pathinfo['dirname']))
{
if (file_exists($pathinfo['dirname']) === false)
{
if (@mkdir($pathinfo['dirname'], 0777, true) === false)
{
return false;
}
}
}
if ($flag === FILE_APPEND)
{
// 多个php-fpm写一个文件的时候容易丢失,要加锁
//return @file_put_contents($file, $content, FILE_APPEND|LOCK_EX);
return @file_put_contents($file, $content, FILE_APPEND);
}
else
{
return @file_put_contents($file, $content, LOCK_EX);
}
}
/**
* 检查路径是否存在,不存在则递归生成路径
*
* @param mixed $path 路径
* @static
* @access public
* @return bool or string
*/
public static function path_exists($path)
{
$pathinfo = pathinfo($path . '/tmp.txt');
if (!empty($pathinfo['dirname']))
{
if (file_exists($pathinfo['dirname']) === false)
{
if (mkdir($pathinfo['dirname'], 0777, true) === false)
{
return false;
}
}
}
return $path;
}
/**
* 递归删除目录
*
* @param mixed $dir
* @return void
* @author seatle <seatle@foxmail.com>
* @created time :2016-09-18 10:17
*/
public static function deldir($dir)
{
//先删除目录下的文件:
$dh = opendir($dir);
while ($file = readdir($dh))
{
if($file!="." && $file!="..")
{
$fullpath = $dir."/".$file;
if(!is_dir($fullpath))
{
unlink($fullpath);
}
else
{
self::deldir($fullpath);
}
}
}
closedir($dh);
//删除当前文件夹:
if(rmdir($dir))
{
return true;
}
else
{
return false;
}
}
/**
* 递归修改目录权限
*
* @param mixed $path 目录
* @param mixed $filemode 权限
* @return bool
*/
public static function chmodr($path, $filemode)
{
if (!is_dir($path))
{
return @chmod($path, $filemode);
}
$dh = opendir($path);
while (($file = readdir($dh)) !== false)
{
if ($file != '.' && $file != '..')
{
$fullpath = $path . '/' . $file;
if (is_link($fullpath))
{
return FALSE;
}
elseif (!is_dir($fullpath) && !@chmod($fullpath, $filemode))
{
return FALSE;
}
elseif (!self::chmodr($fullpath, $filemode))
{
return FALSE;
}
}
}
closedir($dh);
if (@chmod($path, $filemode))
{
return TRUE;
}
else
{
return FALSE;
}
}
/**
* 数组格式化为CSV
*
* @param mixed $data
* @return void
* @author seatle <seatle@foxmail.com>
* @created time :2016-07-29 11:32
*/
public static function format_csv($data)
{
foreach ($data as $k=>$v)
{
$v = str_replace(",", "", $v);
$v = str_replace("", "", $v);
$data[$k] = $v;
}
return implode(",", $data);
}
/**
* 判断是否为utf8字符串
* @parem $str
* @return bool
*/
public static function is_utf8($str)
{
if ($str === mb_convert_encoding(mb_convert_encoding($str, "UTF-32", "UTF-8"), "UTF-8", "UTF-32"))
{
return true;
}
else
{
return false;
}
}
/**
* 获取文件编码
* @param $string
* @return string
*/
public static function get_encoding($string)
{
$encoding = mb_detect_encoding($string, array('UTF-8', 'GBK', 'GB2312', 'LATIN1', 'ASCII', 'BIG5'));
return strtolower($encoding);
}
/**
* 转换数组值的编码格式
* @param array $arr
* @param string $toEncoding
* @param string $fromEncoding
* @return array
*/
public static function array_iconv($arr, $from_encoding, $to_encoding)
{
eval('$arr = '.iconv($from_encoding, $to_encoding.'//IGNORE', var_export($arr,TRUE)).';');
return $arr;
}
/**
* 从普通时间返回Linux时间截(strtotime中文处理版)
* @parem string $dtime
* @return int
*/
public static function cn_strtotime($dtime)
{
if (!preg_match("/[^0-9]/", $dtime))
{
return $dtime;
}
$dtime = trim($dtime);
$dt = Array(1970, 1, 1, 0, 0, 0);
$dtime = preg_replace("/[\r\n\t]|日|秒/", " ", $dtime);
$dtime = str_replace("", "-", $dtime);
$dtime = str_replace("", "-", $dtime);
$dtime = str_replace("", ":", $dtime);
$dtime = str_replace("", ":", $dtime);
$dtime = trim(preg_replace("/[ ]{1,}/", " ", $dtime));
$ds = explode(" ", $dtime);
$ymd = explode("-", $ds[0]);
if (!isset($ymd[1]))
{
$ymd = explode(".", $ds[0]);
}
if (isset($ymd[0]))
{
$dt[0] = $ymd[0];
}
if (isset($ymd[1])) $dt[1] = $ymd[1];
if (isset($ymd[2])) $dt[2] = $ymd[2];
if (strlen($dt[0]) == 2) $dt[0] = '20' . $dt[0];
if (isset($ds[1]))
{
$hms = explode(":", $ds[1]);
if (isset($hms[0])) $dt[3] = $hms[0];
if (isset($hms[1])) $dt[4] = $hms[1];
if (isset($hms[2])) $dt[5] = $hms[2];
}
foreach ($dt as $k => $v)
{
$v = preg_replace("/^0{1,}/", '', trim($v));
if ($v == '')
{
$dt[$k] = 0;
}
}
$mt = mktime($dt[3], $dt[4], $dt[5], $dt[1], $dt[2], $dt[0]);
if (!empty($mt))
{
return $mt;
}
else
{
return strtotime($dtime);
}
}
public static function cn_substr($string, $length = 80, $etc = '...', $count_words = true)
{
mb_internal_encoding("UTF-8");
if ($length == 0) return '';
if (strlen($string) <= $length) return $string;
preg_match_all("/[\x01-\x7f]|[\xc2-\xdf][\x80-\xbf]|\xe0[\xa0-\xbf][\x80-\xbf]|[\xe1-\xef][\x80-\xbf][\x80-\xbf]|\xf0[\x90-\xbf][\x80-\xbf][\x80-\xbf]|[\xf1-\xf7][\x80-\xbf][\x80-\xbf][\x80-\xbf]/", $string, $info);
if ($count_words)
{
$j = 0;
$wordscut = "";
for ($i = 0; $i < count($info[0]); $i ++)
{
$wordscut .= $info[0][$i];
if (ord($info[0][$i]) >= 128)
{
$j = $j + 2;
}
else
{
$j = $j + 1;
}
if ($j >= $length)
{
return $wordscut . $etc;
}
}
return join('', $info[0]);
}
return join("", array_slice($info[0], 0, $length)) . $etc;
}
/**
* 获取文件后缀名
*
* @param mixed $file_name 文件名
* @static
*
* @access public
* @return string
*/
public static function get_extension($file_name)
{
$ext = explode('.', $file_name);
$ext = array_pop($ext);
return strtolower($ext);
}
// 获取 Url 跳转后的真实地址
public static function getrealurl($url)
{
if (empty($url))
{
return $url;
}
$header = get_headers($url, 1);
if (empty($header[0]) || empty($header[1]))
{
return $url;
}
if (strpos($header[0], '301') || strpos($header[0], '302'))
{
if (empty($header['Location']))
{
return $url;
}
if (is_array($header['Location']))
{
return $header['Location'][count($header['Location']) - 1];
}
else
{
return $header['Location'];
}
}
else
{
return $url;
}
}
// 解压服务器用 Content-Encoding:gzip 压缩过的数据
public static function gzdecode($data)
{
$flags = ord(substr($data, 3, 1));
$headerlen = 10;
$extralen = 0;
$filenamelen = 0;
if ($flags & 4)
{
$extralen = unpack('v', substr($data, 10, 2));
$extralen = $extralen[1];
$headerlen += 2 + $extralen;
}
if ($flags & 8) // Filename
$headerlen = strpos($data, chr(0), $headerlen) + 1;
if ($flags & 16) // Comment
$headerlen = strpos($data, chr(0), $headerlen) + 1;
if ($flags & 2) // CRC at end of file
$headerlen += 2;
$unpacked = @gzinflate(substr($data, $headerlen));
if ($unpacked === FALSE) $unpacked = $data;
return $unpacked;
}
/**
* 数字金额转换为中文
* @param string|integer|float $num 目标数字
* @param boolean $sim 使用小写(默认)
* @return string
*/
public static function number2chinese($num, $sim = FALSE)
{
if (!is_numeric($num)) return '含有非数字非小数点字符!';
$char = $sim ? array('零', '一', '二', '三', '四', '五', '六', '七', '八', '九') : array('零', '壹', '贰', '叁', '肆', '伍', '陆', '柒', '捌', '玖');
$unit = $sim ? array('', '十', '百', '千', '', '万', '亿', '兆') : array('', '拾', '佰', '仟', '', '萬', '億', '兆');
$retval = '';
$num = sprintf("%01.2f", $num);
list ($num, $dec) = explode('.', $num);
// 小数部分
if ($dec['0'] > 0)
{
$retval .= "{$char[$dec['0']]}";
}
if ($dec['1'] > 0)
{
$retval .= "{$char[$dec['1']]}";
}
// 整数部分
if ($num > 0)
{
$retval = "" . $retval;
$f = 1;
$str = strrev(intval($num));
for ($i = 0, $c = strlen($str); $i < $c; $i ++)
{
if ($str[$i] > 0)
{
$f = 0;
}
if ($f == 1 && $str[$i] == 0)
{
$out[$i] = "";
}
else
{
$out[$i] = $char[$str[$i]];
}
$out[$i] .= $str[$i] != '0' ? $unit[$i % 4] : '';
if ($i > 1 and $str[$i] + $str[$i - 1] == 0)
{
$out[$i] = '';
}
if ($i % 4 == 0)
{
$out[$i] .= $unit[4 + floor($i / 4)];
}
}
$retval = join('', array_reverse($out)) . $retval;
}
return $retval;
}
public static function colorize($str, $status = "info")
{
$out = "";
switch ($status)
{
case 'succ':
$out = "\033[32m"; // Blue
break;
case "error":
$out = "\033[31m"; // Red
break;
case "warn":
$out = "\033[33m"; // Yellow
break;
case "note":
$out = "\033[34m"; // Green
break;
case "debug":
$out = "\033[36m"; // Green
break;
default:
$out = "\033[0m"; // info
break;
}
return $out.$str."\033[0m";
}
public static function node_to_array($dom, $node)
{
if(!is_a( $dom, 'DOMDocument' ) || !is_a( $node, 'DOMNode' ))
{
return false;
}
$array = array();
// Discard empty nodes
$localName = trim( $node->localName );
if( empty($localName))
{
return false;
}
if( XML_TEXT_NODE == $node->nodeType )
{
return $node->nodeValue;
}
foreach ($node->attributes as $attr)
{
$array['@'.$attr->localName] = $attr->nodeValue;
}
foreach ($node->childNodes as $childNode)
{
if ( (isset($childNode->childNodes->length) && 1 == $childNode->childNodes->length) &&
XML_TEXT_NODE == $childNode->firstChild->nodeType )
{
$array[$childNode->localName] = $childNode->nodeValue;
}
else
{
if( false !== ($a = self::node_to_array( $dom, $childNode)))
{
$array[$childNode->localName] = $a;
}
}
}
return $array;
}
public static function is_win()
{
return strtoupper(substr(PHP_OS,0,3))==="WIN";
}
/**
* 和 http_build_query 相反,分解出参数
*
* @return void
* @author seatle <seatle@foxmail.com>
* @created time :2016-05-16 17:29
*/
public static function http_split_query($query, $is_query = false)
{
if (!$is_query)
{
$parse_arr = parse_url($query);
if (empty($parse_arr['query']))
{
return array();
}
$query = $parse_arr['query'];
}
$query_arr = explode("&", $query);
$params = array();
foreach ($query_arr as $val)
{
$arr = explode("=", $val);
$params[$arr[0]] = $arr[1];
}
return $params;
}
}