计算Google PageRank的php代码

Posted by 冰河 at 21:52 Add comments 13,529 Views
十二 182009

可以方便调用。这段代码在windows和linux下都能用。

<?php
class PageRank
{
//settings – host and user agent
var $googlehost='www.google.com';
var $googleua='Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.0.6) Gecko/20060728 Firefox/1.5';
//convert a string to a 32-bit integer
function StrToNum($Str, $Check, $Magic) {
    $Int32Unit = 4294967296;  // 2^32
    $length = strlen($Str);
    for ($i = 0; $i < $length; $i++) {
        $Check *= $Magic;    
        //If the float is beyond the boundaries of integer (usually +/- 2.15e+9 = 2^31),
        //  the result of converting to integer is undefined
        //  refer to http://www.php.net/manual/en/language.types.integer.php
        if ($Check >= $Int32Unit) {
            $Check = ($Check – $Int32Unit * (int) ($Check / $Int32Unit));
            //if the check less than -2^31
            $Check = ($Check < -2147483648) ? ($Check + $Int32Unit) : $Check;
        }
        $Check += ord($Str{$i});
    }
    return $Check;
}
//genearate a hash for a url
function HashURL($String) {
    $Check1 = $this->StrToNum($String, 0×1505, 0×21);
    $Check2 = $this->StrToNum($String, 0, 0x1003F);
    $Check1 >>= 2;    
    $Check1 = (($Check1 >> 4) & 0x3FFFFC0 ) | ($Check1 & 0x3F);
    $Check1 = (($Check1 >> 4) & 0x3FFC00 ) | ($Check1 & 0x3FF);
    $Check1 = (($Check1 >> 4) & 0x3C000 ) | ($Check1 & 0x3FFF);   
    $T1 = (((($Check1 & 0x3C0) << 4) | ($Check1 & 0x3C)) <<2 ) | ($Check2 & 0xF0F );
    $T2 = (((($Check1 & 0xFFFFC000) << 4) | ($Check1 & 0x3C00)) << 0xA) | ($Check2 & 0xF0F0000 );
    return ($T1 | $T2);
}
//genearate a checksum for the hash string
function CheckHash($Hashnum) {
    $CheckByte = 0;
    $Flag = 0;
    $HashStr = sprintf('%u', $Hashnum) ;
    $length = strlen($HashStr);
    for ($i = $length – 1;  $i >= 0;  $i –) {
        $Re = $HashStr{$i};
        if (1 === ($Flag % 2)) {             
            $Re += $Re;    
            $Re = (int)($Re / 10) + ($Re % 10);
        }
        $CheckByte += $Re;
        $Flag ++;   
    }
    $CheckByte %= 10;
    if (0 !== $CheckByte) {
        $CheckByte = 10 – $CheckByte;
        if (1 === ($Flag % 2) ) {
            if (1 === ($CheckByte % 2)) {
                $CheckByte += 9;
            }
            $CheckByte >>= 1;
        }
    }
    return '7'.$CheckByte.$HashStr;
}
//return the pagerank checksum hash
function getch($url) { return $this->CheckHash($this->HashURL($url)); }
//return the pagerank figure
function getrank($url)
{
    $urlinfo=parse_url($url);
    $start=$urlinfo["scheme"]<>""?strlen($urlinfo["scheme"]."://"):0;
    $url=substr($url,$start);
    $pr = -1;    // default return
    $ch = $this->getch($url);
    $fp = fsockopen("www.google.com", 80, $errno, $errstr, 30);
    if ($fp) {
       $out = "GET /search?client=navclient-auto&ch=$ch&features=Rank&q=info:$url HTTP/1.1
";
       //echo "<pre>$out</pre>"; //debug only
       $out .= "User-Agent: {$this->googleua}
";
       $out .= "Host: www.google.com
";
       $out .= "Connection: Close

";
       fwrite($fp, $out); 
       //$pagerank = substr(fgets($fp, 128), 4); //debug only
       //echo $pagerank; //debug only
       while (!feof($fp)) {
            $data = fgets($fp, 128);
            //echo $data;
            $pos = strpos($data, "Rank_");
            if($pos === false){} else{
                $pr=substr($data, $pos + 9);
                $pr=trim($pr);
                $pr=str_replace("",'',$pr);
                return $pr;
            }
       }
       //else { echo "$errstr ($errno)<br />"; } //debug only
       fclose($fp);
    }
    return $pr;
    }
}
//$gpr = new PageRank();
//echo $gpr->printrank("http://www.baidu.com/");
?>

相关日志

Leave a Reply

(required)

(required)

You may use these HTML tags and attributes: <a href="" title=""> <abbr title=""> <acronym title=""> <b> <blockquote cite=""> <cite> <code> <del datetime=""> <em> <i> <q cite=""> <strike> <strong>

Protected by WP Anti Spam
© 2009 - 2024 冰河的博客