
Ian Noble - 2011-08-26 16:15:52 -
In reply to message 1 from Ian Noble
Amending the code in GoogleCrawler.class.php appears to fix:
// echo '<pre>' . htmlspecialchars($this->content) . '</pre><br />';
// $pos = strpos($this->content, '<h2 class=r><a href="', $npos);
$pos = strpos($this->content, '<h2 class="r"><a class="l" href="', $npos);
while($pos != false)
{
$curr = array();
// $npos = strpos($this->content, '"', $pos + 22);
$npos = strpos($this->content, '"', $pos + 34);
if($npos == false)
break;
$curr['url'] = html_entity_decode(substr($this->content, $pos+33, $npos-$pos-33));
$pos = strpos($this->content, '>', $npos);
$npos = strpos($this->content, '</a>', $pos);
if($pos == false || $npos == false)
break;
$curr['title'] = substr($this->content, $pos+1, $npos-$pos-1);
// $pos = strpos($this->content, '<div class=std>', $npos);
$pos = strpos($this->content, '<div class="std"><span class="s">', $npos);
$npos = strpos($this->content, '</span>', $pos);
if($pos == false || $npos == false)
break;
$curr['description'] = substr($this->content, $pos+33, $npos-$pos-33);
// $pos = strpos($this->content, '<nobr><a class=fl href="', $npos);
// $tmppos=strpos($this->content, '<h2 class=r><a href="', $npos);
$pos = strpos($this->content, '<nobr><a href="', $npos);
$tmppos=strpos($this->content, '<h2 class="r"><a class="l" href="', $npos);
if($pos && (!$tmppos || $pos < $tmppos))
{
$npos = strpos($this->content, '"', $pos+15);
$curr['cache-url'] = html_entity_decode(substr($this->content, $pos+15, $npos-$pos-14));
}
else
$curr['cache-url'] = null;
$pos = $tmppos;
$this->results[] = $curr;
}