Foros del Web » Programando para Internet » PHP »

Funcion para calcular el tamaño de las paginas 1

Estas en el tema de Funcion para calcular el tamaño de las paginas 1 en el foro de PHP en Foros del Web. Hola, encontre una clase que se supone que te calcula el tamaño de las paginas. Aqui teneis el código: (Fichero class.webpagesize.php) <? class WebpageSize { ...
  #1 (permalink)  
Antiguo 20/02/2007, 04:31
 
Fecha de Ingreso: febrero-2007
Mensajes: 2
Antigüedad: 13 años, 5 meses
Puntos: 0
Funcion para calcular el tamaño de las paginas 1

Hola, encontre una clase que se supone que te calcula el tamaño de las paginas. Aqui teneis el código:
(Fichero class.webpagesize.php)

<?
class WebpageSize {
var $url = '';
var $baseurl = '';
var $tailfile = '';
var $proxy = '';
var $proxyport = 3128;
var $pages = array();
var $freqpages = array(); //frequency of page element to be loaded

function setURL($url) {
$this->url = $this->parseURL($url);
}

function parseURL($url) {

$this->tailfile = substr($url, strrpos($url, '/')+1);
$parsed = parse_url($url);
if($this->tailfile == $parsed['host']) $this->tailfile = '';
if(substr($url, -1)=='/' or $this->tailfile)
return $url;
else
$url = $url.'/';
return $url;
}

function setBaseURL($str) {
preg_match("/base.*[\s]*href[\040]*=[\040]*\"?([^\"' >]+)/ie", $str, $match);

if($match[1]) {
$url = $this->parseURL($match[1]);
if(substr($url, -1)!='/') $url .= '/';
$this->baseurl = $url;
} else {
$this->baseurl = $this->url;
}

}



function setURLviaProxy($url,$proxy,$port) {
$this->setURL($url);
$this->proxy = $proxy;
$this->proxyport = $port;

}


function getResult() {

$paths = $this->grabPageSources();
array_unshift ($paths, $this->url);
$pages = array();



if(function_exists('curl_init')) {

$ch = curl_init();
if($this->proxy) {
curl_setopt($ch, CURLOPT_PROXY, $this->proxy.':'.$this->proxyport);
}
curl_setopt($ch, CURLOPT_HEADER, 1);
curl_setopt($ch, CURLOPT_NOBODY, 1);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);

for($i=0; $i<count($paths); $i++){

if(!array_key_exists($paths[$i],$pages) ){
curl_setopt($ch, CURLOPT_URL, $paths[$i]);
$headers = curl_exec ($ch);
$filesize = curl_getinfo($ch,CURLINFO_CONTENT_LENGTH_DOWNLOAD) ;
if(!$filesize) //try using fopen
$filesize = strlen($this->getContent($paths[$i]));
if(!$filesize)
continue;
$this->freqpages[$paths[$i]] = 1;
$pages[$paths[$i]] = $filesize;
$this->totalsize += $filesize;
} else
$this->freqpages[$paths[$i]] += 1;
}
curl_close ($ch);
} else {

for($i=0; $i<count($paths); $i++){
if(!array_key_exists($paths[$i],$pages) ){
$filesize = strlen($this->getContent($paths[$i]));
$this->freqpages[$paths[$i]] = 1;
$pages[$paths[$i]] = $filesize;
$this->totalsize += $filesize;
} else
$this->freqpages[$paths[$i]] += 1;
}

}

natsort($pages);

return $pages;
}

function totalPageSize() {
return $this->totalsize;
}

/*
* this one is usefull
*/
function readableSize($size) {
return number_format($size/1024,2)." KB";
}

/*
* pre-formated output
*/
function printResult() {
$pages = $this->getResult();
$strtable = '<table width=\"700\" border=\"1\">' .
'<tr bgcolor=#F3F3F3><td width=\"360\" colspan=2>' .
'<div align=\"center\">Webpage`s URL : '.$this->url.'</div></td>' .
'<td width=\"140\" colspan=2>Size : ' . $this->readableSize($this->totalPageSize()) . '</td></tr>' .
'<tr bgcolor=#F3F3F3><td width=\"24\"><div align=\"center\">#</div></td>' .
'<td width=\"210\">URL of Elements of Webpage</td>' .
'<td width=\"86\">Filesize</td>' .
'<td width=\"32\">Freq</td>' .
'</tr>';
$n=0;
while(list($url,$size) = each($pages)){
$strtable .= '<tr><td width=20>'.++$n.'</td><td width=440>'. $url. '</td>' .
'<td width=100>'. $this->readableSize($size) . '</td>' .
'<td width=40>' . $this->freqpages[$url] . '</td></tr>';
}
$strtable .= '<tr bgcolor=#F3F3F3><td>&nbsp;</td><td> Total Webpage Size</td><td colspan=2>'.
$this->readableSize($this->totalPageSize()) . '</td></tr>';
$strtable .='</table>';
echo $strtable;
}


function getContent($url){
if($this->proxy) {
return $this->getContentProxy($url);
} else {
$file = @fopen($url, 'rb');
$buffer = '';
if(!$file) return '';
while(!feof($file)) {
$buffer .= fread($file,1024);
}
fclose($file);
return $buffer;
}
}


function getContentProxy($url)
{
$buffer = '';

$file = fsockopen($this->proxy, $this->proxyport);
if (!$file) {return '';}
fputs($file, "GET $url HTTP/1.0\r\nHost: $proxy_name\r\n\r\n");
while(!feof($file)) {$buffer .= fread($file,4096);}
fclose($file);
$buffer = substr($buffer, strpos($buffer,"\r\n\r\n")+4);
return $buffer;
}


function grabPageSources() {

$content = $this->getContent($this->url);
$this->setBaseURL($content);

$arr_src1 = array();
$arr_src2 = array();
$arr_src3 = array();
$arr_src4 = array();
$arr_src5 = array();
$arr_src6 = array();

$arr_src1 = $this->searchSources($content);
//search CSS classes that applied on page
$this->CSSclasses = $this->searchCSSClasses($content);

// print_r( $arr_clss );
$arr_src2 = $this->searchSourcesOnCSS($content);
$arr_src3 = $this->searchCSSLinks($content);

if(!empty($arr_src3))
$arr_src4 = $this->searchSourcesOnCSSFiles($arr_src3);
//search on frames if exists
$arr_src5 = $this->searchFrames($content);
if(!empty($arr_src5))
$arr_src6 = $this->searchSourcesOnFrames($arr_src5);

$arr_sources = array_merge ($arr_src1, $arr_src2, $arr_src3,
$arr_src4, $arr_src5, $arr_src6);

return $this->resolvePathSources($arr_sources);

}


function searchSources($str) {
preg_match_all("/[img|input|embed|script]+.*[\s]*(src|background)[\040]*=[\040]*\"?([^\"' >]+)/ie", $str, $arr_source);
return $arr_source[2];
}


function searchCSSClasses($str) {
preg_match_all("/class[\040]*=[\040]*\"?([^\"' >]+)/ie", $str, $arr_source);
return $arr_source[1];
}

function searchFrames($str) {
preg_match_all("/frame.*[\s]*src[\040]*=[\040]*\"?([^\"' >]+)/ie", $str, $arr_source);
return $arr_source[1];
}


function xsearchSourcesOnCSS($str) {
preg_match_all("/(url\(\"?([^\")]+))/ie", $str, $arr_source);
return $arr_source[2];
}


function searchSourcesOnCSS($str) {

preg_match_all("/(\.(.*)\s+\{[\s]+)*.*url\(\"?([^\")]+)/ie", $str, $arr_source);
for($i=0; $i<count($arr_source);$i++) {
if( in_array( $arr_source[2][$i], $this->CSSclasses )) {
$arr_sources[] = $arr_source[3][$i];
}
}
return $arr_sources;
}

/*
* searching webpage elements on frames
*/
function searchSourcesOnFrames($framefiles) {
$arr_source = array();
$arr_sources = array();
while(list(,$src) = each($framefiles)) {

$framepage = $this->makeAbsolutePath($src,$this->baseurl);

$page = new WebpageSize;
if($this->proxy)
$page->setURLviaProxy($framepage, $this->proxy, $this->proxyport);
else
$page->setURL($framepage);


$arr_source = $page->grabPageSources();
$arr_sources = array_merge($arr_sources, $arr_source);
}
return $arr_sources;
}


function searchSourcesOnCSSFiles($cssfiles) {

//search sources on CSS file
$arr_CSSlinks = array();
while(list(,$src) = each($cssfiles)) {
$numstepback = substr_count($src, "../");
$CSSpage = $this->makeAbsolutePath($src,$this->baseurl);

$CSScontent = $this->getContent($CSSpage);
$arr_sourcelink = $this->searchSourcesOnCSS($CSScontent);
if(empty( $arr_sourcelink )) continue;

while(list(,$srclink) = each($arr_sourcelink)) {
$arr_CSSlink[] = str_repeat("../",$numstepback) . $srclink;
}
$arr_CSSlinks = array_merge($arr_CSSlinks, $arr_CSSlink);
}
return $arr_CSSlinks;

}


function searchCSSLinks($str) {
preg_match_all("/<link[^>]+href[\040]*=[\040]*[\"|\'|\\\\]*([^\'|\"|>|\040]*(.*)\.css)[\"|\'|>|\040|\\\\]*/ie",$str, $arr_CSSlink);
return $arr_CSSlink[1];
}


function makeAbsolutePath ($src,$url) {

$addone = 1;

if ($this->tailfile) {
$url = substr($url, 0, -(strlen($this->tailfile)+1));
$addone = 0;
}

if (strtolower(substr($src,0,4)) != 'http') {

$numrel = substr_count($src, "../");
$src = str_replace("../","",$src);

for($i=0; $i < $numrel+$addone; $i++) {
$lastslash = strrpos($url,"/");
$url = substr($url, 0, $lastslash);
}

return $url.'/'.$src;
}
else return $src;
}

}
?>
  #2 (permalink)  
Antiguo 20/02/2007, 04:34
 
Fecha de Ingreso: febrero-2007
Mensajes: 2
Antigüedad: 13 años, 5 meses
Puntos: 0
Re: Funcion para calcular el tamaño de las paginas 1

Despues lo ejecuto con este fichero (sample.webpagesize.php)

<?php
//don't forget to set time limit, because this class eats a lot of time :)
set_time_limit(900);
include("class.webpagesize.php");

$size = new WebpageSize;
//$size->setURLviaProxy("Pagina Web","PROXY03","80");
$size->setURL("Pagina web");
$size->printResult();


?>





Al ejecutar me da los siguientes errores:

Warning: array_merge() [function.array-merge]: Argument #2 is not an array in C:\AppServ\www\class.webpagesize.php on line 226

Warning: Variable passed to each() is not an array or object in C:\AppServ\www\class.webpagesize.php on line 155

Warning: array_unshift() [function.array-unshift]: The first argument should be an array in C:\AppServ\www\class.webpagesize.php on line 63


A ver si me podeis echar una mano. Gracias
Atención: Estás leyendo un tema que no tiene actividad desde hace más de 6 MESES, te recomendamos abrir un Nuevo tema en lugar de responder al actual.
Respuesta




La zona horaria es GMT -6. Ahora son las 11:31.