<?phpheader("content-type:text/html;charset=utf-8");ini_set(‘max_execution_time‘, 300);function getScholar($start,$end){$result = array();for($i=$start;$i<$end;$i++){ ???$url = "http://****.gov.cn*****"; ???$post_data = array ( ???"currentPage" => $i,//当前页 ???"pageSize" =>7 , ???); ???$ch = curl_init(); ???curl_setopt($ch, CURLOPT_URL, $url); ???curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); ???// 我们在POST数据哦! ???curl_setopt($ch, CURLOPT_POST, 1); ???// 把post的变量加上 ???curl_setopt($ch, CURLOPT_POSTFIELDS, $post_data); ???$output = curl_exec($ch); ???$output_json = json_decode($output); ???curl_close($ch); ???//var_dump($output_json); ???//die(); ???foreach($output_json->projectList as $k => $v){ ???????$result[$v->code][]=$v->code; ???????$result[$v->code][]=$v->name; ???};}$end_end = $end-1;$fp = fopen("output-{$start}-{$end_end}.csv",‘w‘);//打开文件 ???foreach ($result as $v){ ???????ob_clean(); ???????if(fputcsv($fp,$v)===false){//加数组数据放到csv文件中 ???????????die("can‘t write csv line"); ???????} ???}fclose($fp) or die("can‘t close scholar.csv");if(count($result)!=($end-$start)*7){ ???echo "数据出现错误"; ???echo "<br/>"; ???echo count($result); ???exit;} ???echo "数据抓取完成,共抓取到".count($result)."条记录";}getScholar(3950,4000);//读取3950-4000页的数据
源码来源:http://blog.csdn.net/htmlgood/article/details/49558703
curl站外抓取第二发
原文地址:http://www.cnblogs.com/spritphp/p/7681030.html