使用fsockopen()函数来实现获取页面信息,完整代码如下
//设置字符集(由于要抓取的网易网站字符集编码是gbk编码)header("content-type:text/html;charset=gb2312");//设置中国时区date_default_timezone_set('PRC');//页面域名$hostname = "news.163.com";//"www.163.com";//请求方式$method = 'GET';//URI资源唯一标识$target ?= '/17/1225/09/D6G89EED000189FH.html'; ???????// Specific program//所带参数$getValues = '';// or $getValues = "?key1=value1&key2=value2";$port = 80;$fp = fsockopen($hostname, $port=80, $errno, $errstr, 30);if (!$fp) { ???echo "####Failed!$errstr ($errno)<br/>\n";} else { ???//注意空格 ???$out = "$method $target$getValues HTTP/1.1\r\n"; //原$out = "GET / HTTP/1.1\r\n"; ???//$out .= "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8\r\n"; ???//$out .= "Accept-Encoding: gzip, deflate\r\n"; ???//注意冒号后面要有一个空格 ???$out .= "Accept-Language: zh-CN,zh;q=0.9\r\n"; ???$out .= "User-Agent: Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.94 Safari/537.36\r\n"; ???$out .= "Host: $hostname\r\n"; ???$out .= "Cache-Control: max-age=0\r\n"; ???$out .= "Connection: Close\r\n\r\n"; ???fwrite($fp, $out); ???while (!feof($fp)) { ???????echo fgets($fp, 128); ???} ???fclose($fp);}
根据以上示例,可以进行通过设置请求头信息,即可实现任意端口的访问