View file sitemap/.function.inc.php

File size: 22.21Kb
<?php
/*
#
# +------------------------------------------------------------------------------+
# | Sitemap Creator 0.2 beta  build 20080514                                     |
# +------------------------------------------------------------------------------+
# | Sitemap Creator crawls/spiders your website, creates XML sitemaps , pings    |
# | Search Engine bots to crawl them, tracks bots and send it by mail.           |
# +------------------------------------------------------------------------------+
# | Email         [email protected]                                          |
# | Web           www.gadelkareem.com                                            |
# |                                                                              |
# | Before using, modifying or publishing this class, you should refer to the    |
# | GNU GENERAL PUBLIC LICENSE. This is available at:                            |
# | http://www.gnu.org/licenses/gpl.txt                                          | 
# +------------------------------------------------------------------------------+
# | .functions.inc.php : Functions , do not edit                                 |
# +------------------------------------------------------------------------------+
#
*/

define('SMC_SM_URL', 'http://gadelkareem.com/2008/05/15/sitemap-creator-02-beta/');
define('SMC_URL', 'http://gadelkareem.com/');
define('SMC_REGEX_SITENAME', preg_quote(preg_replace( '#^www\.(.*)$#i', '\\1', SMC_SITE)) );



if( SMC_DISABLED_DIRS ){
	$tmp_arr = split('@-@', SMC_DISABLED_DIRS);
	$disabled_dirs = '(?:';
	foreach($tmp_arr as $dir){
		if( !empty($dir) )
			$disabled_dirs .= "/{$dir}|";
	}
	$disabled_dirs{strlen($disabled_dirs)-1} = ')';
	unset($tmp_arr, $dir);
}


if(!function_exists('file_put_contents')) {
	if (!defined('LOCK_EX')) define('LOCK_EX', 2);
	function file_put_contents($filename, $content, $flags = null) {
		if ( ( $fh = @fopen($filename, 'wb') ) === false) return false;
		if (!@flock($fh, LOCK_EX)) return false;
		if ((@fwrite($fh, $content)) === false) return false;
		@fclose($fh);
		return true;
	}
}
if (!function_exists('file_get_contents')) {
	function file_get_contents($filename) {
		if (false === $fh = @fopen($filename, 'rb') ) {
			return false;
		}
		clearstatcache();
		if ($fsize = @filesize($filename)) {
			$data = fread($fh, $fsize);
		} else {
			$data = '';
			while (!feof($fh)) {
				$data .= fread($fh, 8192);
			}
		}
		fclose($fh);
		return $data;
	}	
}
/*caching function*/
function & _cache($name, $val=0, $checking = false){
	$retVal = false;
	if(!SMC_USE_CACHE) return $retVal;
	clearstatcache();
	if( !is_dir(SMC_DATA_CACHE) ){
		if(!@mkdir(SMC_DATA_CACHE,0777))
			return $retVal;
	}
	$file = SMC_DATA_CACHE . md5($name);
	if(!empty($val)){
		if( @file_put_contents($file,serialize($val),LOCK_EX) )
			$retVal = true;
	}else{
		if( file_exists($file) && time()-filemtime($file) < SMC_CACHE_TTL )
			if($checking){
				$retVal = true;
			}elseif( !($cont=@unserialize(@file_get_contents($file))) ){
				_error('Could not get cache!',  'WARNING');
			}else
				return $cont;
	}
	return $retVal;
}

/*retrieves urls contents*/
function & get_content(&$url , $nocache=false){
	if( !$nocache && $cont =& _cache($url) ) return $cont;
	$fbug = false;
	$user_agent = "Sitemaps Creator ".SMC_VERSION."(compatible; sitemapcreatorbot/".SMC_VERSION."; +http://gadelkareem.com/) ";
	ini_set('user_agent', $user_agent);
		
	$parse_url = & _parse_url($url);
	if ( ($fp = @fsockopen($parse_url['host'], $parse_url['port'], $errno, $errstr, SMC_CONNECT_TIME_OUT)) === false) {
		switch($errno) {
			case -3: _error( 'Socket creation failed (-3)', 'WARNING');
			case -4: _error( 'DNS lookup failure (-4)', 'WARNING');
			default: _error( 'Connection failed ('.$errno.') '.$errstr, 'WARNING');
		}
		return $fbug;
	}
	socket_set_timeout($fp, SMC_CONNECT_TIME_OUT);

	$get = "GET {$parse_url['path']}{$parse_url['query']} HTTP/1.1\r\n";
	$get .= "Host: {$parse_url['host']}\r\n";
	$get .= "User-Agent: {$user_agent})\r\n";
	$get .= "Accept-Encoding: gzip,deflate\r\n";
	#$get .= "Referer: {$parse_url['scheme']}://{$parse_url['host']}{$parse_url['path']}\r\n";
	$get .= "Connection: close\r\n\r\n";
	stream_set_blocking($fp, 3);
	fwrite($fp, $get);

	$chunk = 0;
	while ( '' != ($line=trim(fgets($fp))) ) {
		if ( false !== ($pos=strpos($line, ':')) ){
			$header = strtolower(trim(substr($line, 0, $pos)));
			$val    = strtolower(trim(substr($line, $pos+1)));
			
			if(SMC_CALC_LAST_MODIFIED && ( $header == 'date' || $header == 'last-modified' )  ){
				$cont['lastmod'] = strtotime($val);
			}elseif ( $header == 'content-type' && strpos($val, 'text') === false ){
				_error("Document type is <strong>{$val}</strong> for URL <strong>{$url}</strong>");
				return $fbug;
			}elseif ( $header == 'transfer-encoding' &&  $val  == 'chunked'){
				$chunk = 1;
			}elseif ( $header == 'content-encoding' &&  ( $val  == 'gzip' || $val  == 'deflate' )){
				$gzipped = true;
			}elseif (  $header == 'location' ){ #redirection
				_error( 'Page <strong><a href="'.$url.'">'.$url.'</a></strong> is redirecting to <strong><a href="'.$val.'">'.$val.'</a></strong>', 'WARNING');
				$cont['new_url'] = $val;
				return $cont;
			}
		}elseif( preg_match('#(?:^|\s)(?!200|302|301)([0-9]{3})(?:$|\s)#', $line, $code) ){
			_error("<a href=\"http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html#sec10.".($code[1]{0}).".".($code[1]{2}+1)."\" target=\"_blank\" ><strong>{$code[0]} Error</strong></a> Found for URL <strong>{$url}</strong>", 'WARNING');
			return $fbug;
		}
	}
	
	
	$page = '';
	$chunk = !empty($chunk) ? hexdec(fgets($fp)) : -1;
	
	
	while ($chunk != 0 && !feof($fp) ) {
		if ($chunk > 0){
			 $part = fread($fp, $chunk);
			 $chunk -= strlen($part);
			 $page .= $part;
			 if ($chunk == 0){
				 if (fgets($fp) != "\r\n") 
					_error( 'Chunk decoding error on <strong><a href="'.$url.'">'.$url.'</a></strong>', 'DEBUG');
				 $chunk = hexdec(fgets($fp));
			 }
		} else {
			 $page .= fread($fp, 1024);
		}
	}
	fclose($fp);

	if( isset($gzipped) && (substr($page, 0, 8) == "\x1f\x8b\x08\x00\x00\x00\x00\x00") )
		$cont['content'] = gzinflate( substr($page, 10) );
	else
		$cont['content'] = $page;
	if(!$nocache) 
		_cache($url, $cont);
	

	return $cont;
}



/*main crawler function*/
function crawl($url, $depth=1, $pr=0 ){
	global $urls, $start_time, $redirect;
	static $redirected = 0;
	
	if( _time()-$start_time > SMC_CRAWL_TIME_OUT  || _mem() > SMC_MEMORY_LIMIT  || ( SMC_URLS_LIMIT && count($urls) > SMC_URLS_LIMIT ) )
		return;
		
	if( !$url = valid_link($url) )
		return;
		
	if( isset($urls[$url]) ){
		if( SMC_CALC_PERIORITY == 1) $urls[$url]['priority'] += $pr;
		return;
	}
	
	
	if( blacklist($url ,true) || isset($redirect[$url])  )
		return;

	$cont =& get_content($url);
	
	if( isset($cont['new_url']) ){ 
		$redirect[$url] = 1;
		if( $redirected < SMS_MAX_REDIRECT ){
			$redirected++;
			crawl($cont['new_url'], $depth, $pr);
		}
		return;
	}elseif( empty($cont['content']) ){
		blacklist($url, false, 'Empty Page');
		return;
	}
	$redirected = 0;
	
	
	$urls[$url] = array(
		'lastmod' 	 => (!SMC_CALC_LAST_MODIFIED || empty($cont['lastmod']) ) ? time() : $cont['lastmod'],
		'priority'   => $pr,
		);
	
	
	#<base> tag , only to use with make_link()
	preg_match('#<base\s+[^>]*href\s*=\s*["\']?([^:]+://[^"\'\#\s>]+)#i', $cont['content'], $base ) ; 

	#selecting valid URLs from current page content
	preg_match_all(
	'#<\s*(?:a|frame|iframe|form)[^>]*\s+(?:href|src|URL|action)\s*=\s*["\']?(?!mailto:|news:|javascript:|ftp:|telnet:|callto:|ed2k:)([^"\'\#\s>]+)#is', 
					$cont['content'], $matches);
					
	foreach( $matches[1] as $sub ){
		if( $link = valid_link( (!empty($base[1]) ? $base[1] : $url) , $sub) )
			$links[] = $link;
	}	

	
	
	if( !isset($links) || !$links){
		_error( 'No Links were found on page <strong><a href="' . $url . '">' . $url . '</a></strong>', 'DEBUG');
		return;
	}
	
	for( $i=0,$max = count($links);$i<$max; $i++ ){
		$link = $links[$i];
		
		$p = ( SMC_CALC_PERIORITY == 1) ? atan( (($max-$i)/$max) + atan($depth) )  : ( ( SMC_CALC_PERIORITY == 3 ) ? (strlen($link)/100) + count(explode('/', $link))  : 0 );
		crawl($link, $depth+1, $p);
	}
}
/*validating urls*/
function valid_link($url, $sub=''){
	if( !empty($sub) && preg_match('#\.(ico|png|jpg|gif|css|js)(\?.*)?$#i', $sub) )	#excluding graphics
		return false;
	$url = make_link( $url , $sub);
	if( SMC_DISABLED_DIRS && preg_match('#' . SMC_REGEX_SITENAME . $GLOBALS['disabled_dirs'] . '#i', $url) )	#disabled URLs
		return false;
	if(preg_match('#^https?://(www\.)?([^/]+\.)*' . SMC_REGEX_SITENAME . '/[^\#]*$#i', $url)) #crawl upper level sub-domains too
		return $url;
	_error('URL <strong>"<a href="' . $url . '">' . $url . '</a>"</strong> Not Valid. ', 'DEBUG');
	return false;
}
/*reformats the urls */
function make_link($url,$sub=''){
	$sub = cleanurl($sub);
	$url = cleanurl($url);
	
	if( strpos($sub, '://') !== false && ($parse_url = & _parse_url($sub)) ){
		return $parse_url['scheme'].'://'.$parse_url['host'].$parse_url['path'].$parse_url['query'];
	}
	$parse_url = & _parse_url($url);
	
	if($sub == '' )
		return $parse_url['scheme'] . '://' . $parse_url['host'] . $parse_url['path'] . $parse_url['query'];
		
	if($sub == '/' )
		return $parse_url['scheme'] . '://' . $parse_url['host'] . '/';
		
	while( preg_match('#^/?\.+/(.+)#', $sub, $match) ){
		$sub = $match[1];
		$url = preg_replace('#(/[^/]+)(?:/[^/]*)$#','\\1',$url);
		$match = array();
	}
	
	
	if( $sub{0} == '/' )
		return $parse_url['scheme'].'://'.$parse_url['host'].$sub;
	

	$url = preg_replace('#/[^/]*$#','/',$url);

	return $url.$sub;

}
function cleanurl($url){
	$url = str_replace('&amp;', '&', $url);
	$url = preg_replace("#([^:]+)//#i", "\\1/", $url);
	return preg_replace("#(sid=[^;&]+)|(\?$)#i", '', $url);
}
/*calculating periority, frequency*/
function calc(){
	if( !SMC_CALC_PERIORITY ) return;
	global $urls;
	$tot = count($urls);
	$i = 0;

	foreach($urls as $url){
		if($url['priority'] == 0) continue;
		if(!isset($max)){
			$max = $min = $url['priority'];
		}elseif($url['priority'] > $max)
			$max = $url['priority'];
		elseif( $url['priority'] > 0 && $url['priority'] < $min)
			$min = $url['priority'];
	}
	foreach($urls as $url => $arr){
		$p = 0;

		switch( SMC_CALC_PERIORITY ){
			case 1:
			default:
				$p = round( ( $urls[$url]['priority']/$max ) , 3) ;
			break;
			case 2:
				$p = round( ($tot-$i)/$tot , 3) ;
			break;
			case 3:
				$p = round( @($min / $urls[$url]['priority']) , 3) ;
			break;
		}
		$urls[$url]['priority'] = $p > 1 ? 1 : ( $p < SMC_MIN_PERIORITY ? SMC_MIN_PERIORITY : $p );
		#}
		$p = $urls[$url]['priority'];
		if(SMC_CALC_FREQUENCY){
			if( $p >= 0.8 )
				$urls[$url]['freq'] = 'hourly';
			elseif( $p >= 0.4 )
				$urls[$url]['freq'] = 'daily';
			elseif( $p >= 0.2 )
				$urls[$url]['freq'] = 'weekly';
			elseif( $p >= 0.1 )
				$urls[$url]['freq'] = 'monthly';
			else
				$urls[$url]['freq'] = 'yearly';
		}
		$i++;
	}
}

/*echo anything for debuging output*/
function _echo($val){
	echo "<hr /><pre >";
	if($val===false) $val = "false";
	elseif($val===true) $val = "true";
	elseif($val===0) $val = "zero";
	elseif($val==='') $val = "empty string";
	elseif($val===NULL) $val = "NULL";
	elseif(is_array($val) || is_object($val)) $val = var_export($val,1);
	elseif(is_file($val)) $val = file_get_contents($val);
	echo htmlspecialchars($val);
	echo "</pre><hr />";
}
function _error($msg, $title='NOTICE'){
	static $msg_array = array();
	if( (SMC_SHOW_ERROR && $title != 'DEBUG') || (SMC_SHOW_DEBUG && $title == 'DEBUG') && !isset($msg_array[$msg]) ){ #
		echo "<div><strong>{$title}: </strong>{$msg}</div>";
		$msg_array[$msg] = 1;
	}
	return false;
}
function blacklist(&$url,$check=false, $reason=''){
	if(!SMC_USE_BLACKLIST) return false;
	$file = SMC_DATA_ERRORS.md5($url);
	clearstatcache();
	if($check){
		return (file_exists($file) && (time()-filemtime($file)) < SMC_CACHE_TTL);
	}
	if( !is_dir(SMC_DATA_ERRORS) )
		if(!@mkdir(SMC_DATA_ERRORS,0777))
			return _error('Could not create error directory, please check if cache directory is writable', 'WARNING');
	@touch($file);
	_error('URL <strong>"<a href="'.$url.'">'.$url.'</a>"</strong> Blacklisted. Reason : ' . $reason, 'DEBUG');
}
function _time()
{
   list($usec, $sec) = explode(" ", microtime());
   return ((float)$usec + (float)$sec);
}
function _mem() {
	if(function_exists("memory_get_peak_usage")) {
		$mem = memory_get_peak_usage(true);
	} else if(function_exists("memory_get_usage")) {
		$mem = memory_get_usage(true);
	}else
		return false;
	return round($mem / 1024 / 1024,2);
}
function & _parse_url(&$url){
	$p = parse_url($url);
	if(!$p)
		return $p;
	if ( empty($p['port']) ) {
		if ($p['scheme'] == 'https') {
			$p['port'] = 443;
		} else {
			$p['port'] = 80;
		}
	}
	if ( empty($p['path']) ) {
		$p['path'] = '/';
	}
	#code from http://enarion.net/google/ crawler class
	$query_encoded = '';
	if (!empty($p['query']) ) {
		$query_encoded = '?';
		foreach (split('&', $p['query']) as $id => $quer) {
			$v = split('=', $quer);
			if (!empty($v[1])) {
				$query_encoded .= $v[0].'='.rawurlencode(urldecode($v[1])).'&';
			} else {
				$query_encoded .= $v[0].'&';
			}
		}
		$query_encoded = substr($query_encoded, 0, strlen($query_encoded) - 1);
		$query_encoded = str_replace('%2B','+', $query_encoded);
	}
	$p['query'] = $query_encoded;
	return $p;
	
}
function clean_dir($count=false, $dir=1){
	$dir = $dir == 1 ? SMC_DATA_CACHE : SMC_DATA.'errors/';
	if ($handle = @opendir($dir)) {
		$i = 0;
		while (false !== ($file = readdir($handle))) {
		   if ($file != "." && $file != ".." && $file != "test" && !is_dir($dir.$file)) {
				if(!$count && @unlink($dir.$file))
				    echo "{$dir}{$file} <font color=red>deleted</font><br>";
				$i++;
		   }
		}
		if(!$count){
			if(!$i) echo '<strong>No cache files found in '.$dir.'</strong>';
			else echo '<strong>'.$i.' files deleted successfully</strong>';
		}else
			return $i;
	    closedir($handle);
	}
	
	return 0;
}
function draw_table(){
	global $urls;
	$sitemaps = array();
	clearstatcache();
	if ($handle = @opendir(SMC_DATA_SITEMAPS)) {
		while (false !== ($file = readdir($handle))) {
		   if ($file != "." && $file != ".." && $file != "default") {
				$sitemaps[] = $file;
		   }
		}
	    closedir($handle);
	}
	if(!empty($sitemaps)){
		echo '<p>Displaying '.count($sitemaps).' Sitemaps found on '.SMC_SITE.'</p>';
		echo '<table width="100%" border="1" cellspacing="0" cellpadding="0">
		  <tr>
			<th scope="col" width="40%">Sitemap</th>
			<th scope="col" width="30%">Date Created</th>
			<th scope="col" width="30%">Ping \'em</th>
		  </tr>';
		foreach($sitemaps as $sm){
			echo "<tr>
				<td><div><a href=\"".SMC_SELF."?do=showsitemap&sm={$sm}.xml.gz\">sitemap{$sm}.xml.gz</a></div></td>
				<td>".date( "H:i:s Y-m-d",filemtime(SMC_DATA_SITEMAPS.$sm) )."</td>
				<td><a href=\"".SMC_SELF."?do=ping&sm={$sm}\">Ping</a></td>
			  </tr>";
		}
		echo '</table>';
	}

	if( !empty( $urls ) ){
		$st = 3; #how many number we skip before putting '....';
		$max = count($urls); #counting the array we're displaying
		
		
		###here goes the page
		$pg = isset($_GET['pg']) ? $_GET['pg'] : 1;
		$nlimit= $max > $pg*SMC_PER_PAGE ? $pg*SMC_PER_PAGE : $max;
		
		echo '<p>Displaying ' . ( $nlimit - (($pg-1)*SMC_PER_PAGE) ) . ' of '. $max .' URLs found on '. SMC_SITE .'</p>';
		echo '<table width="100%" border="1" cellspacing="0" cellpadding="0">
		  <tr>
			<th scope="col">URL</th>
			<th scope="col" width="5%">Priority</th>
			<th scope="col" width="5%">Last Modified</th>
			<th scope="col" width="5%">Frequency</th>
		  </tr>';
		reset($urls);  
		for($i=0; $i < (($pg-1)*SMC_PER_PAGE); $i++, next($urls) );
		for(; $i<($nlimit+1);$i++){
			$url = key($urls);
			$arr = $urls[$url];		
			echo "<tr>
				<td><div><a href=\"{$url}\" target=\"_blank\">{$url}</a></div></td>
				<td>{$arr['priority']}</td>
				<td>".gmdate("Y-m-d\TH:i:s",$arr['lastmod'])."</td>
				<td>{$arr['freq']}</td>
			  </tr>";
			if( next($urls) === false )
				break;
		}
		echo '</table>';
		#the numbers
		echo '<div id="numbers">';
		$tp = ceil($max/SMC_PER_PAGE);
		$st++;
		for($x=1; $x<($tp+1); $x++){
			if($tp > ($st*2)){
				if(($x==$st+1 && $pg > ($st*2))){
					$x = $pg-$st;
					echo '....';
				}elseif($x==($pg+$st) && ($tp-$pg) > ($st*2)){
					$x = $tp-$st;
					echo '....';
				}
			}
			echo $x==$pg ? "<u>{$x}</u>" :"<a href=\"{$_SERVER['PHP_SELF']}?pg={$x}\" >{$x}</a>";
		}
		echo '</div>';

		echo '<br /><strong>Use this URL to add to your crontab or schedule tasks :</strong> <br /><div class="secure">http://'.SMC_SITE.SMC_SELF.'?do=createsitemap&secure='.md5(SMC_PASS).'</div>';

	}
}
function csv( $read=false ){
	global $urls;
	clearstatcache();
	
	if($read){
		if( !@file_exists(SMC_DATA_SITES.SMC_SITE) )
			return false;
			
		$file = file(SMC_DATA_SITES.SMC_SITE);
		
		foreach($file as $line){
			$data = explode("\t", $line);
			$urls[trim($data[0])] = array( 'priority' => floatval($data[1]),
										   'lastmod'  => intval($data[2]),
										   'freq'  => trim($data[3]));
		}
		return;	
	}
	
	$data = '';
	if( empty($urls) ) return false;
	foreach($urls as $url => $arr )
		$data .= "{$url}\t{$arr['priority']}\t{$arr['lastmod']}\t{$arr['freq']}\n";

	if( !is_dir(SMC_DATA_SITES) )
		if( !@mkdir(SMC_DATA_SITES,0777) )
			return false;
	if(@file_put_contents(SMC_DATA_SITES.SMC_SITE, $data, LOCK_EX)){
		@chmod(SMC_DATA_SITES.SMC_SITE,0777);
		return true;
	}else
		return false;
}



function create_sitemap(){
	global $urls;
	
	clearstatcache();
	if( !is_dir(SMC_DATA_SITEMAPS) )
		if( !@mkdir(SMC_DATA_SITEMAPS,0777) )
			return false;
	
	
	$sm = date("Ymd",time());
	
	
	$cont = '<?xml version="1.0" encoding="UTF-8"?><?xml-stylesheet type="text/xsl" href="'.SMC_SELF.'?do=gss.xsl"?><urlset xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/09/sitemap.xsd"	xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">';
	
	foreach($urls as $url => $arr){
		$cont.='<url>';
		$cont.='<loc>'.utf8_encode(htmlentities($url, ENT_QUOTES )).'</loc>';
		if(!empty($arr['lastmod'])) $cont.="<lastmod>".gmdate("Y-m-d\TH:i:s",$arr['lastmod'])."+00:00</lastmod>";
		if(!empty($arr['freq'])) $cont.="<changefreq>{$arr['freq']}</changefreq>";
		$cont.="<priority>{$arr['priority']}</priority>";
		$cont.='</url>';
	}
	$cont.='</urlset>';
		
	$fh = gzopen( SMC_DATA_SITEMAPS . $sm, 'wb');
	gzwrite( $fh, $cont);
	gzclose($fh);
	if(@file_put_contents( SMC_DATA_SITEMAPS . 'default', $sm))
		echo "<a href=\"".SMC_SELF."?do=showsitemap&sm={$sm}.xml.gz\">sitemap{$sm}.xml.gz</a> Created successflly with ".count($urls)." URLs<br />";
	ping_em($sm);
		
	
}

function get_sitemap(){

	$sm = !empty($_GET['sm']) && intval($_GET['sm']) ? intval($_GET['sm']) : @file_get_contents( SMC_DATA_SITEMAPS .'default');
	$sm = SMC_DATA_SITEMAPS.$sm;
	clearstatcache();
	if(@file_exists($sm))
	{
		@ini_set('zlib.output_compression', '0');
		@ini_set('zlib.output_compression_level', '-1');

		@header('Content-type: text/xml; charset=UTF-8');
		@header("Expires: " . gmdate("D, d M Y H:i:s",time()+(60*60*24)) . " GMT");
		if(isset($_SERVER['HTTP_ACCEPT_ENCODING']) && strpos($_SERVER['HTTP_ACCEPT_ENCODING'],'gzip')===false){
			readgzfile($sm);
		}else{
			@header('Content-Length: '.filesize($sm));
			@header('Content-Encoding: gzip' );
			if(strpos($_SERVER["SERVER_SOFTWARE"], 'lighttpd') !==false)
				@header("X-LIGHTTPD-send-file: {$sm}");
			else
				@readfile($sm);
		}
		if(isset($_SERVER['HTTP_USER_AGENT']) && preg_match("#(msnbot|Lycos_Spider|eMiragorobot|Slurp|Ask Jeeves|WebCrawler|Scooter|googlebot)#si", $_SERVER['HTTP_USER_AGENT']))
			_mail('Sitemap crawled', $sm." has been viewed");

		exit;
	
	}else{
		_mail('Sitemap not found', $sm.' could not be found');
			
		@header('Cache-Control: no-cache');
		@header('Status: 404 Not Found');
		echo 'Sitemap file not found';

		return;
	}
}
function ping_em($sm){
	if(!SMC_PING) return;
	global $pings;
	$out = '';
	$sm = urlencode( SMC_SCHEME . SMC_SITE . "/sitemap.php?do=showsitemap&sm={$sm}.xml.gz");
	foreach($pings as $domain => $val){
		$out .= "<br />";
		$url = $val . $sm;
		if( $reply = & get_content( $url , true ) )
			$out .= "Pinged <a href=\"{$val}{$sm}\" ><strong>{$domain}</strong></a> said: <br />" . 
				    preg_replace("#\n+|\s+|<script [^>]+>[^>]+</script>|<[^>]*>#is",' ',chop($reply['content']))."<br />";
		else
			$out .= "<font color=\"red\">Failed to ping <strong><a href=\"{$val}{$sm}\" >{$domain}</a></strong></font><br />";
	}
	_mail('Ping results', $out);
		
	echo $out;
}

function robots(){
	$robots = $_SERVER['DOCUMENT_ROOT'].'/robots.txt';
	if(!@touch($robots))
		return _error( '<strong>'.$robots.'</strong> does not exist or is not writtable, please chmod 666', 'WARNING');
	$robotstxt = file_get_contents($robots);
	if(strpos($robotstxt, 'Sitemap :') !== false){
		echo "<a href=\"http://".SMC_SITE."/robots.txt\">robots.txt</a> already contains reference to sitemap<br />";
	}else{
		$robotstxt .= "\nSitemap : http://".SMC_SITE."/sitemap.php?do=showsitemap&sm=sitemap.xml.gz\n";
		if( file_put_contents($robots, $robotstxt ,LOCK_EX) )
			 echo "<a href=\"http://".SMC_SITE."/robots.txt\">robots.txt</a> edited successfully<br />";
	}
	_echo($robotstxt);
}
function login(){
	if(!SMC_PASS)
		return;
	if( (isset($_POST['pass'])  && $_POST['pass'] == SMC_PASS) ||
	(isset($_COOKIE['smc_pass']) && $_COOKIE['smc_pass'] == md5(SMC_PASS)) ||
			 (isset($_GET['secure']) && $_GET['secure'] == md5(SMC_PASS) && isset($_GET['do']) && $_GET['do'] == 'createsitemap')
			)
		return;
		
	echo '<br /><div align="center">';
	if(isset($_POST['pass'])) echo '<font color="red">Wrong Password, please check on the script config file</font><br />';
	echo '<h2>Login</h2><form method="post" action="'.SMC_SELF.'"><input type="password"  name="pass" size="15"/><input type="submit" value="Login" /></form></div><div class="cp"><hr /><a href="'.SMC_SM_URL.'" >Sitemap Creator <?php echo SMC_VERSION ?></a> By <a href="'.SMC_URL.'">GadElKareem</a></div>
</body></html>';
	exit;
}
function _mail($title,$msg){
	if(!SMC_EMAIL) return;
	$title = '[SITEMAP]'.$title;
	$msg = preg_replace('#<[^>]*>#s','',str_replace('<br />', "\n", $msg));
	$msg .= "\nIP -: http://whois.domaintools.com/{$_SERVER['REMOTE_ADDR']}".
			"\nDate -: ".date(" h:i:s a ( l d  F Y )").
			"\nBot -: {$_SERVER['HTTP_USER_AGENT']}".
			"\nLocation -: http://{$_SERVER['HTTP_HOST']}{$_SERVER['REQUEST_URI']}";
	$headers = "From: sitemap.creator@".SMC_SITE;
	$headers .= "\r\nMIME-Version: 1.0 \r\nX-Mailer: Sitemap Creator Mailer\r\n";
	mail(SMC_EMAIL,$title,$msg,$headers);
}
?>