PHP批量检查网站的sitemap是否存在

<?php
$webfile = "sitexml.txt";
$opensite = fopen($webfile, 'r');

function curl($url) {
    /*
    * 测试用的浏览器信息
    *

    */
    $browsers = array (

        "user_agent" => "Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6 (.NET CLR 3.5.30729)",
        "language" => "en-us,en;q=0.5"
    );
    $ch = curl_init();
    // 设置 url
    curl_setopt($ch, CURLOPT_URL, $url);
    // 设置浏览器的特定header
    //CURLOPT_HTTPHEADER: An array of HTTP header fields to set.
    //curl_setopt($ch, CURLOPT_HTTPHEADER, 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6 (.NET CLR 3.5.30729)');
    curl_setopt($ch, CURLOPT_HTTPHEADER, array (
        "User-Agent: {$browsers['user_agent']}",
        "Accept-Language: {$browsers['language']}"
    ));

    // 页面内容我们并不需要
    curl_setopt($ch, CURLOPT_NOBODY, 1);
    // 只需返回HTTP header
    curl_setopt($ch, CURLOPT_HEADER, 1);
    // 返回结果,而不是输出它
    curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
    //执行curl操作
    //return (curl_exec($ch)!==false) ? true : false;
    $output = curl_exec($ch);
    return $output;
    curl_close($ch);
}
while (!feof($opensite)) {
    $onesite = fgets($opensite, 4096);
    $onesite = str_replace("\n", "", $onesite);
    $url = $onesite . "/sitemap.xml";
    if (!empty ($onesite)) {

        echo "[URL]: $url<br>";
        echo curl($url);

        if (curl($url) == false) {
            echo '<FONT color=#ff0000>' . "网站不能打开" . '</font>' . "<br>";

        }
        preg_match('/HTTP\/1.1\s*(\d+)[\s\S]+/', curl($url), $http_status);
        //print_r($http_status);
        if ($http_status[1] == 200) {
            echo $url . "存在sitemap" . "<br>";
        }
        if ($http_status[1] == 301) {
            $url = 'www.' . $url;
            preg_match('/HTTP\/1.1\s*(\d+)[\s\S]+/', curl($url), $http_status);
            if ($http_status[1] == 200) {
                echo $url . "存在sitemap" . "<br>";
            } else {
                echo '<FONT color=#ff0000>' . $url . "没有sitemap" . '</font>';
            }
        }
        if ($http_status[1] == (404|400|403|500|501|502|503|504|505)) {
            echo '<FONT color=#ff0000>' . $url . "没有sitemap" . '</font>';

        }

        echo "<br><br>";
    }
}
?>

 

评论关闭