2020-01-14 19:17:29 +08:00

937 lines
27 KiB
PHP
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

<?php
// +----------------------------------------------------------------------
// | PHPSpider [ A PHP Framework For Crawler ]
// +----------------------------------------------------------------------
// | Copyright (c) 2006-2014 https://doc.phpspider.org All rights reserved.
// +----------------------------------------------------------------------
// | Licensed ( http://www.apache.org/licenses/LICENSE-2.0 )
// +----------------------------------------------------------------------
// | Author: Seatle Yang <seatle@foxmail.com>
// +----------------------------------------------------------------------
//----------------------------------
// PHPSpider实用函数集合类文件
//----------------------------------
namespace phpspider\core;
// 引入PATH_DATA
require_once __DIR__ . '/constants.php';
class util
{
/**
* 文件锁
* 如果没有锁,就加一把锁并且执行逻辑,然后删除锁
* if (!util::lock('statistics_offer'))
* {
* util::lock('statistics_offer');
* ...
* util::unlock('statistics_offer');
* }
* 否则输出锁存在
* else
* {
* echo "process has been locked\n";
* }
*
* @param mixed $lock_name
* @param int $lock_timeout
* @return void
* @author seatle <seatle@foxmail.com>
* @created time :2016-02-18 14:28
*/
public static function lock($lock_name, $lock_timeout = 600)
{
$lock = util::get_file(PATH_DATA."/lock/{$lock_name}.lock");
if ($lock)
{
$time = time() - $lock;
// 还没到10分钟说明进程还活着
if ($time < $lock_timeout)
{
return true;
}
unlink(PATH_DATA."/lock/{$lock_name}.lock");
}
util::put_file(PATH_DATA."/lock/{$lock_name}.lock", time());
return false;
}
public static function unlock($lock_name)
{
unlink(PATH_DATA."/lock/{$lock_name}.lock");
}
public static function time2second($time, $is_log = true)
{
if(is_numeric($time))
{
$value = array(
"years" => 0, "days" => 0, "hours" => 0,
"minutes" => 0, "seconds" => 0,
);
if($time >= 31556926)
{
$value["years"] = floor($time/31556926);
$time = ($time%31556926);
}
if($time >= 86400)
{
$value["days"] = floor($time/86400);
$time = ($time%86400);
}
if($time >= 3600)
{
$value["hours"] = floor($time/3600);
$time = ($time%3600);
}
if($time >= 60)
{
$value["minutes"] = floor($time/60);
$time = ($time%60);
}
$value["seconds"] = floor($time);
//return (array) $value;
//$t = $value["years"] ."y ". $value["days"] ."d ". $value["hours"] ."h ". $value["minutes"] ."m ".$value["seconds"]."s";
if ($is_log)
{
$t = $value["days"] ."d ". $value["hours"] ."h ". $value["minutes"] ."m ".$value["seconds"]."s";
}
else
{
$t = $value["days"] ." days ". $value["hours"] ." hours ". $value["minutes"] ." minutes";
}
return $t;
}
else
{
return false;
}
}
public static function get_days($day_sta, $day_end = true, $range = 86400)
{
if ($day_end === true) $day_end = date('Y-m-d');
return array_map(function ($time) {
return date('Y-m-d', $time);
}, range(strtotime($day_sta), strtotime($day_end), $range));
}
/**
* 获取文件行数
*
* @param mixed $filepath
* @return void
* @author seatle <seatle@foxmail.com>
* @created time :2016-03-31 21:54
*/
public static function get_file_line($filepath)
{
$line = 0 ;
$fp = fopen($filepath , 'r');
if (!$fp)
{
return 0;
}
//获取文件的一行内容注意需要php5才支持该函数
while( stream_get_line($fp,8192,"\n") ){
$line++;
}
fclose($fp);//关闭文件
return $line;
}
/**
* 获得表数
*
* @param mixed $table_name 表名
* @param mixed $item_value 唯一索引
* @param int $table_num 表数量
* @return void
* @author seatle <seatle@foxmail.com>
* @created time :2015-10-22 23:25
*/
public static function get_table_num($item_value, $table_num = 100)
{
//sha1:返回一个40字符长度的16进制数字
$item_value = sha1(strtolower($item_value));
//base_convert:进制建转换下面是把16进制转成10进制方便做除法运算
//str_pad:把字符串填充为指定的长度下面是在左边加0表数量大于100就3位否则2位
$step = $table_num > 100 ? 3 : 2;
$item_value = str_pad(base_convert(substr($item_value, -2), 16, 10) % $table_num, $step, "0", STR_PAD_LEFT);
return $item_value;
}
/**
* 获得表面
*
* @param mixed $table_name 表名
* @param mixed $item_value 唯一索引
* @param int $table_num 表数量
* @return void
* @author seatle <seatle@foxmail.com>
* @created time :2015-10-22 23:25
*/
public static function get_table_name($table_name, $item_value, $table_num = 100)
{
//sha1:返回一个40字符长度的16进制数字
$item_value = sha1(strtolower($item_value));
//base_convert:进制建转换下面是把16进制转成10进制方便做除法运算
//str_pad:把字符串填充为指定的长度下面是在左边加0共3位
$step = $table_num > 100 ? 3 : 2;
$item_value = str_pad(base_convert(substr($item_value, -2), 16, 10) % $table_num, $step, "0", STR_PAD_LEFT);
return $table_name."_".$item_value;
}
// 获得当前使用内存
public static function memory_get_usage()
{
$memory = memory_get_usage();
return self::format_bytes($memory);
}
// 获得最高使用内存
public static function memory_get_peak_usage()
{
$memory = memory_get_peak_usage();
return self::format_bytes($memory);
}
// 转换大小单位
public static function format_bytes($size)
{
$unit = array('b', 'kb', 'mb', 'gb', 'tb', 'pb');
return @round($size / pow(1024, ($i = floor(log($size, 1024)))), 2) . ' ' . $unit[$i];
}
/**
* 获取数组大小
*
* @param mixed $arr 数组
* @return string
*/
public static function array_size($arr)
{
ob_start();
print_r($arr);
$mem = ob_get_contents();
ob_end_clean();
$mem = preg_replace("/\n +/", "", $mem);
$mem = strlen($mem);
return self::format_bytes($mem);
}
/**
* 数字随机数
*
* @param int $num
* @return void
* @author seatle <seatle@foxmail.com>
* @created time :2016-09-18 10:17
*/
public static function rand_num($num = 7)
{
$rand = "";
for ($i = 0; $i < $num; $i ++)
{
$rand .= mt_rand(0, 9);
}
return $rand;
}
/**
* 字母数字混合随机数
*
* @param int $num
* @return void
* @author seatle <seatle@foxmail.com>
* @created time :2016-09-18 10:17
*/
public static function rand_str($num = 10)
{
$chars = 'abcdefghijklmnopqrstuvwxyz0123456789';
$string = "";
for ($i = 0; $i < $num; $i ++)
{
$string .= substr($chars, rand(0, strlen($chars)), 1);
}
return $string;
}
/**
* 汉字转拼音
*
* @param mixed $str 汉字
* @param int $ishead
* @param int $isclose
* @static
* @access public
* @return string
*/
public static function pinyin($str, $ishead = 0, $isclose = 1)
{
// $str = iconv("utf-8", "gbk//ignore", $str);
$str = mb_convert_encoding($str, "gbk", "utf-8");
global $pinyins;
$restr = '';
$str = trim($str);
$slen = strlen($str);
if ($slen < 2)
{
return $str;
}
if (count($pinyins) == 0)
{
$fp = fopen(PATH_DATA . '/pinyin.dat', 'r');
while (!feof($fp))
{
$line = trim(fgets($fp));
$pinyins[$line[0] . $line[1]] = substr($line, 3, strlen($line) - 3);
}
fclose($fp);
}
for ($i = 0; $i < $slen; $i ++)
{
if (ord($str[$i]) > 0x80)
{
$c = $str[$i] . $str[$i + 1];
$i ++;
if (isset($pinyins[$c]))
{
if ($ishead == 0)
{
$restr .= $pinyins[$c];
}
else
{
$restr .= $pinyins[$c][0];
}
}
else
{
// $restr .= "_";
}
}
else if (preg_match("/[a-z0-9]/i", $str[$i]))
{
$restr .= $str[$i];
}
else
{
// $restr .= "_";
}
}
if ($isclose == 0)
{
unset($pinyins);
}
return $restr;
}
/**
* 生成字母前缀
*
* @param mixed $s0
* @return char
* @author seatle <seatle@foxmail.com>
* @created time :2016-09-18 10:17
*/
public static function letter_first($s0)
{
$firstchar_ord = ord(strtoupper($s0{0}));
if (($firstchar_ord >= 65 and $firstchar_ord <= 91) or ($firstchar_ord >= 48 and $firstchar_ord <= 57)) return $s0{0};
// $s = iconv("utf-8", "gbk//ignore", $s0);
$s = mb_convert_encoding($s0, "gbk", "utf-8");
$asc = ord($s{0}) * 256 + ord($s{1}) - 65536;
if ($asc >= -20319 and $asc <= -20284) return "A";
if ($asc >= -20283 and $asc <= -19776) return "B";
if ($asc >= -19775 and $asc <= -19219) return "C";
if ($asc >= -19218 and $asc <= -18711) return "D";
if ($asc >= -18710 and $asc <= -18527) return "E";
if ($asc >= -18526 and $asc <= -18240) return "F";
if ($asc >= -18239 and $asc <= -17923) return "G";
if ($asc >= -17922 and $asc <= -17418) return "H";
if ($asc >= -17417 and $asc <= -16475) return "J";
if ($asc >= -16474 and $asc <= -16213) return "K";
if ($asc >= -16212 and $asc <= -15641) return "L";
if ($asc >= -15640 and $asc <= -15166) return "M";
if ($asc >= -15165 and $asc <= -14923) return "N";
if ($asc >= -14922 and $asc <= -14915) return "O";
if ($asc >= -14914 and $asc <= -14631) return "P";
if ($asc >= -14630 and $asc <= -14150) return "Q";
if ($asc >= -14149 and $asc <= -14091) return "R";
if ($asc >= -14090 and $asc <= -13319) return "S";
if ($asc >= -13318 and $asc <= -12839) return "T";
if ($asc >= -12838 and $asc <= -12557) return "W";
if ($asc >= -12556 and $asc <= -11848) return "X";
if ($asc >= -11847 and $asc <= -11056) return "Y";
if ($asc >= -11055 and $asc <= -10247) return "Z";
return 0; // null
}
/**
* 获得某天前的时间戳
*
* @param mixed $day
* @return void
* @author seatle <seatle@foxmail.com>
* @created time :2016-09-18 10:17
*/
public static function getxtime($day)
{
$day = intval($day);
return mktime(23, 59, 59, date("m"), date("d") - $day, date("y"));
}
/**
* 读文件
*/
public static function get_file($url, $timeout = 10)
{
if (function_exists('curl_init'))
{
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_HEADER, 0);
curl_setopt($ch, CURLOPT_TIMEOUT, $timeout);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 10);
$content = curl_exec($ch);
curl_close($ch);
if ($content) return $content;
}
$ctx = stream_context_create(array('http' => array('timeout' => $timeout)));
$content = @file_get_contents($url, 0, $ctx);
if ($content) return $content;
return false;
}
/**
* 写文件,如果文件目录不存在,则递归生成
*/
public static function put_file($file, $content, $flag = 0)
{
$pathinfo = pathinfo($file);
if (!empty($pathinfo['dirname']))
{
if (file_exists($pathinfo['dirname']) === false)
{
if (@mkdir($pathinfo['dirname'], 0777, true) === false)
{
return false;
}
}
}
if ($flag === FILE_APPEND)
{
// 多个php-fpm写一个文件的时候容易丢失要加锁
//return @file_put_contents($file, $content, FILE_APPEND|LOCK_EX);
return @file_put_contents($file, $content, FILE_APPEND);
}
else
{
return @file_put_contents($file, $content, LOCK_EX);
}
}
/**
* 检查路径是否存在,不存在则递归生成路径
*
* @param mixed $path 路径
* @static
* @access public
* @return bool or string
*/
public static function path_exists($path)
{
$pathinfo = pathinfo($path . '/tmp.txt');
if (!empty($pathinfo['dirname']))
{
if (file_exists($pathinfo['dirname']) === false)
{
if (mkdir($pathinfo['dirname'], 0777, true) === false)
{
return false;
}
}
}
return $path;
}
/**
* 递归删除目录
*
* @param mixed $dir
* @return void
* @author seatle <seatle@foxmail.com>
* @created time :2016-09-18 10:17
*/
public static function deldir($dir)
{
//先删除目录下的文件:
$dh = opendir($dir);
while ($file = readdir($dh))
{
if($file!="." && $file!="..")
{
$fullpath = $dir."/".$file;
if(!is_dir($fullpath))
{
unlink($fullpath);
}
else
{
self::deldir($fullpath);
}
}
}
closedir($dh);
//删除当前文件夹:
if(rmdir($dir))
{
return true;
}
else
{
return false;
}
}
/**
* 递归修改目录权限
*
* @param mixed $path 目录
* @param mixed $filemode 权限
* @return bool
*/
public static function chmodr($path, $filemode)
{
if (!is_dir($path))
{
return @chmod($path, $filemode);
}
$dh = opendir($path);
while (($file = readdir($dh)) !== false)
{
if ($file != '.' && $file != '..')
{
$fullpath = $path . '/' . $file;
if (is_link($fullpath))
{
return FALSE;
}
elseif (!is_dir($fullpath) && !@chmod($fullpath, $filemode))
{
return FALSE;
}
elseif (!self::chmodr($fullpath, $filemode))
{
return FALSE;
}
}
}
closedir($dh);
if (@chmod($path, $filemode))
{
return TRUE;
}
else
{
return FALSE;
}
}
/**
* 数组格式化为CSV
*
* @param mixed $data
* @return void
* @author seatle <seatle@foxmail.com>
* @created time :2016-07-29 11:32
*/
public static function format_csv($data)
{
foreach ($data as $k=>$v)
{
$v = str_replace(",", "", $v);
$v = str_replace("", "", $v);
$data[$k] = $v;
}
return implode(",", $data);
}
/**
* 判断是否为utf8字符串
* @parem $str
* @return bool
*/
public static function is_utf8($str)
{
if ($str === mb_convert_encoding(mb_convert_encoding($str, "UTF-32", "UTF-8"), "UTF-8", "UTF-32"))
{
return true;
}
else
{
return false;
}
}
/**
* 获取文件编码
* @param $string
* @return string
*/
public static function get_encoding($string)
{
$encoding = mb_detect_encoding($string, array('UTF-8', 'GBK', 'GB2312', 'LATIN1', 'ASCII', 'BIG5'));
return strtolower($encoding);
}
/**
* 转换数组值的编码格式
* @param array $arr
* @param string $toEncoding
* @param string $fromEncoding
* @return array
*/
public static function array_iconv($arr, $from_encoding, $to_encoding)
{
eval('$arr = '.iconv($from_encoding, $to_encoding.'//IGNORE', var_export($arr,TRUE)).';');
return $arr;
}
/**
* 从普通时间返回Linux时间截(strtotime中文处理版)
* @parem string $dtime
* @return int
*/
public static function cn_strtotime($dtime)
{
if (!preg_match("/[^0-9]/", $dtime))
{
return $dtime;
}
$dtime = trim($dtime);
$dt = Array(1970, 1, 1, 0, 0, 0);
$dtime = preg_replace("/[\r\n\t]|日|秒/", " ", $dtime);
$dtime = str_replace("", "-", $dtime);
$dtime = str_replace("", "-", $dtime);
$dtime = str_replace("", ":", $dtime);
$dtime = str_replace("", ":", $dtime);
$dtime = trim(preg_replace("/[ ]{1,}/", " ", $dtime));
$ds = explode(" ", $dtime);
$ymd = explode("-", $ds[0]);
if (!isset($ymd[1]))
{
$ymd = explode(".", $ds[0]);
}
if (isset($ymd[0]))
{
$dt[0] = $ymd[0];
}
if (isset($ymd[1])) $dt[1] = $ymd[1];
if (isset($ymd[2])) $dt[2] = $ymd[2];
if (strlen($dt[0]) == 2) $dt[0] = '20' . $dt[0];
if (isset($ds[1]))
{
$hms = explode(":", $ds[1]);
if (isset($hms[0])) $dt[3] = $hms[0];
if (isset($hms[1])) $dt[4] = $hms[1];
if (isset($hms[2])) $dt[5] = $hms[2];
}
foreach ($dt as $k => $v)
{
$v = preg_replace("/^0{1,}/", '', trim($v));
if ($v == '')
{
$dt[$k] = 0;
}
}
$mt = mktime($dt[3], $dt[4], $dt[5], $dt[1], $dt[2], $dt[0]);
if (!empty($mt))
{
return $mt;
}
else
{
return strtotime($dtime);
}
}
public static function cn_substr($string, $length = 80, $etc = '...', $count_words = true)
{
mb_internal_encoding("UTF-8");
if ($length == 0) return '';
if (strlen($string) <= $length) return $string;
preg_match_all("/[\x01-\x7f]|[\xc2-\xdf][\x80-\xbf]|\xe0[\xa0-\xbf][\x80-\xbf]|[\xe1-\xef][\x80-\xbf][\x80-\xbf]|\xf0[\x90-\xbf][\x80-\xbf][\x80-\xbf]|[\xf1-\xf7][\x80-\xbf][\x80-\xbf][\x80-\xbf]/", $string, $info);
if ($count_words)
{
$j = 0;
$wordscut = "";
for ($i = 0; $i < count($info[0]); $i ++)
{
$wordscut .= $info[0][$i];
if (ord($info[0][$i]) >= 128)
{
$j = $j + 2;
}
else
{
$j = $j + 1;
}
if ($j >= $length)
{
return $wordscut . $etc;
}
}
return join('', $info[0]);
}
return join("", array_slice($info[0], 0, $length)) . $etc;
}
/**
* 获取文件后缀名
*
* @param mixed $file_name 文件名
* @static
*
* @access public
* @return string
*/
public static function get_extension($file_name)
{
$ext = explode('.', $file_name);
$ext = array_pop($ext);
return strtolower($ext);
}
// 获取 Url 跳转后的真实地址
public static function getrealurl($url)
{
if (empty($url))
{
return $url;
}
$header = get_headers($url, 1);
if (empty($header[0]) || empty($header[1]))
{
return $url;
}
if (strpos($header[0], '301') || strpos($header[0], '302'))
{
if (empty($header['Location']))
{
return $url;
}
if (is_array($header['Location']))
{
return $header['Location'][count($header['Location']) - 1];
}
else
{
return $header['Location'];
}
}
else
{
return $url;
}
}
// 解压服务器用 Content-Encoding:gzip 压缩过的数据
public static function gzdecode($data)
{
$flags = ord(substr($data, 3, 1));
$headerlen = 10;
$extralen = 0;
$filenamelen = 0;
if ($flags & 4)
{
$extralen = unpack('v', substr($data, 10, 2));
$extralen = $extralen[1];
$headerlen += 2 + $extralen;
}
if ($flags & 8) // Filename
$headerlen = strpos($data, chr(0), $headerlen) + 1;
if ($flags & 16) // Comment
$headerlen = strpos($data, chr(0), $headerlen) + 1;
if ($flags & 2) // CRC at end of file
$headerlen += 2;
$unpacked = @gzinflate(substr($data, $headerlen));
if ($unpacked === FALSE) $unpacked = $data;
return $unpacked;
}
/**
* 数字金额转换为中文
* @param string|integer|float $num 目标数字
* @param boolean $sim 使用小写(默认)
* @return string
*/
public static function number2chinese($num, $sim = FALSE)
{
if (!is_numeric($num)) return '含有非数字非小数点字符!';
$char = $sim ? array('零', '一', '二', '三', '四', '五', '六', '七', '八', '九') : array('零', '壹', '贰', '叁', '肆', '伍', '陆', '柒', '捌', '玖');
$unit = $sim ? array('', '十', '百', '千', '', '万', '亿', '兆') : array('', '拾', '佰', '仟', '', '萬', '億', '兆');
$retval = '';
$num = sprintf("%01.2f", $num);
list ($num, $dec) = explode('.', $num);
// 小数部分
if ($dec['0'] > 0)
{
$retval .= "{$char[$dec['0']]}";
}
if ($dec['1'] > 0)
{
$retval .= "{$char[$dec['1']]}";
}
// 整数部分
if ($num > 0)
{
$retval = "" . $retval;
$f = 1;
$str = strrev(intval($num));
for ($i = 0, $c = strlen($str); $i < $c; $i ++)
{
if ($str[$i] > 0)
{
$f = 0;
}
if ($f == 1 && $str[$i] == 0)
{
$out[$i] = "";
}
else
{
$out[$i] = $char[$str[$i]];
}
$out[$i] .= $str[$i] != '0' ? $unit[$i % 4] : '';
if ($i > 1 and $str[$i] + $str[$i - 1] == 0)
{
$out[$i] = '';
}
if ($i % 4 == 0)
{
$out[$i] .= $unit[4 + floor($i / 4)];
}
}
$retval = join('', array_reverse($out)) . $retval;
}
return $retval;
}
public static function colorize($str, $status = "info")
{
$out = "";
switch ($status)
{
case 'succ':
$out = "\033[32m"; // Blue
break;
case "error":
$out = "\033[31m"; // Red
break;
case "warn":
$out = "\033[33m"; // Yellow
break;
case "note":
$out = "\033[34m"; // Green
break;
case "debug":
$out = "\033[36m"; // Green
break;
default:
$out = "\033[0m"; // info
break;
}
return $out.$str."\033[0m";
}
public static function node_to_array($dom, $node)
{
if(!is_a( $dom, 'DOMDocument' ) || !is_a( $node, 'DOMNode' ))
{
return false;
}
$array = array();
// Discard empty nodes
$localName = trim( $node->localName );
if( empty($localName))
{
return false;
}
if( XML_TEXT_NODE == $node->nodeType )
{
return $node->nodeValue;
}
foreach ($node->attributes as $attr)
{
$array['@'.$attr->localName] = $attr->nodeValue;
}
foreach ($node->childNodes as $childNode)
{
if ( (isset($childNode->childNodes->length) && 1 == $childNode->childNodes->length) &&
XML_TEXT_NODE == $childNode->firstChild->nodeType )
{
$array[$childNode->localName] = $childNode->nodeValue;
}
else
{
if( false !== ($a = self::node_to_array( $dom, $childNode)))
{
$array[$childNode->localName] = $a;
}
}
}
return $array;
}
public static function is_win()
{
return strtoupper(substr(PHP_OS,0,3))==="WIN";
}
/**
* 和 http_build_query 相反,分解出参数
*
* @return void
* @author seatle <seatle@foxmail.com>
* @created time :2016-05-16 17:29
*/
public static function http_split_query($query, $is_query = false)
{
if (!$is_query)
{
$parse_arr = parse_url($query);
if (empty($parse_arr['query']))
{
return array();
}
$query = $parse_arr['query'];
}
$query_arr = explode("&", $query);
$params = array();
foreach ($query_arr as $val)
{
$arr = explode("=", $val);
$params[$arr[0]] = $arr[1];
}
return $params;
}
}