check = $ignore_list; } //set a proxy host and port (such as someproxy:8080 or 10.1.1.1:8080 public function set_proxy($host_port){ $this->proxy = $host_port; } //validating urls using list of substrings private function validate($url){ $valid = true; //add substrings of url that you don't want to appear using set_ignore() method foreach($this->check as $val) { if(stripos($url, $val) !== false) { $valid = false; break; } } return $valid; } //multi curl requests private function multi_curl($urls){ // for curl handlers $curl_handlers = array(); //setting curl handlers foreach ($urls as $url) { $curl = curl_init(); curl_setopt($curl, CURLOPT_URL, $url); curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1); if (isset($this->proxy) && !$this->proxy == '') { curl_setopt($curl, CURLOPT_PROXY, $this->proxy); } $curl_handlers[] = $curl; } //initiating multi handler $multi_curl_handler = curl_multi_init(); // adding all the single handler to a multi handler foreach($curl_handlers as $key => $curl) { curl_multi_add_handle($multi_curl_handler,$curl); } // executing the multi handler do { $multi_curl = curl_multi_exec($multi_curl_handler, $active); } while ($multi_curl == CURLM_CALL_MULTI_PERFORM || $active); foreach($curl_handlers as $curl) { //checking for errors if(curl_errno($curl) == CURLE_OK) { //if no error then getting content $content = curl_multi_getcontent($curl); //parsing content $this->parse_content($content); } } curl_multi_close($multi_curl_handler); return true; } //function to call public function get_links($domain){ //getting base of domain url address $this->base = str_replace("http://", "", $domain); $this->base = str_replace("https://", "", $this->base); $host = explode("/", $this->base); $this->base = $host[0]; //getting proper domain name and protocol $this->domain = trim($domain); if(strpos($this->domain, "http") !== 0) { $this->protocol = "http://"; $this->domain = $this->protocol.$this->domain; } else { $protocol = explode("//", $domain); $this->protocol = $protocol[0]."//"; } if(!in_array($this->domain, $this->sitemap_urls)) { $this->sitemap_urls[] = $this->domain; } //requesting link content using curl $curl = curl_init(); curl_setopt($curl, CURLOPT_URL, $this->domain); if (isset($this->proxy) && !$this->proxy == '') { curl_setopt($curl, CURLOPT_PROXY, $this->proxy); } curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1); $page = curl_exec($curl); curl_close($curl); $this->parse_content($page); } //parses content and checks for URLs private function parse_content($page){ //getting all links from href attributes preg_match_all("/]*href\s*=\s*'([^']*)'|". ']*href\s*=\s*"([^"]*)"'."/is", $page, $match); //storing new links $new_links = array(); for($i = 1; $i < sizeof($match); $i++) { //walking through links foreach($match[$i] as $url) { //if doesn't start with http and is not empty if(strpos($url, "http") === false && trim($url) !== "") { //checking if absolute path if($url[0] == "/") $url = substr($url, 1); //checking if relative path else if($url[0] == ".") { while($url[0] != "/") { $url = substr($url, 1); } $url = substr($url, 1); } //transforming to absolute url $url = $this->protocol.$this->base."/".$url; } //if new and not empty if(!in_array($url, $this->sitemap_urls) && trim($url) !== "") { //if valid url if($this->validate($url)) { //checking if it is url from our domain if(strpos($url, "http://".$this->base) === 0 || strpos($url, "https://".$this->base) === 0) { //adding url to sitemap array $this->sitemap_urls[] = $url; //adding url to new link array $new_links[] = $url; } } } } } $this->multi_curl($new_links); return true; } //returns array of sitemap URLs public function get_array(){ return $this->sitemap_urls; } //notifies services like google, bing, yahoo, ask and moreover about your site map update public function ping($sitemap_url, $title ="", $siteurl = ""){ // for curl handlers $curl_handlers = array(); $sitemap_url = trim($sitemap_url); if(strpos($sitemap_url, "http") !== 0) { $sitemap_url = "http://".$sitemap_url; } $site = explode("//", $sitemap_url); $start = $site[0]; $site = explode("/", $site[1]); $middle = $site[0]; if(trim($title) == "") { $title = $middle; } if(trim($siteurl) == "") { $siteurl = $start."//".$middle; } //urls to ping $urls[0] = "http://www.google.com/webmasters/tools/ping?sitemap=".urlencode($sitemap_url); $urls[1] = "http://www.bing.com/webmaster/ping.aspx?siteMap=".urlencode($sitemap_url); $urls[2] = "http://search.yahooapis.com/SiteExplorerService/V1/updateNotification". "?appid=YahooDemo&url=".urlencode($sitemap_url); $urls[3] = "http://submissions.ask.com/ping?sitemap=".urlencode($sitemap_url); $urls[4] = "http://rpc.weblogs.com/pingSiteForm?name=".urlencode($title). "&url=".urlencode($siteurl)."&changesURL=".urlencode($sitemap_url); //setting curl handlers foreach ($urls as $url) { $curl = curl_init(); curl_setopt($curl, CURLOPT_URL, $url); curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1); curl_setopt($curl, CURL_HTTP_VERSION_1_1, 1); $curl_handlers[] = $curl; } //initiating multi handler $multi_curl_handler = curl_multi_init(); // adding all the single handler to a multi handler foreach($curl_handlers as $key => $curl) { curl_multi_add_handle($multi_curl_handler,$curl); } // executing the multi handler do { $multi_curl = curl_multi_exec($multi_curl_handler, $active); } while ($multi_curl == CURLM_CALL_MULTI_PERFORM || $active); // check if there any error $submitted = true; foreach($curl_handlers as $key => $curl) { //you may use curl_multi_getcontent($curl); for getting content //and curl_error($curl); for getting errors if(curl_errno($curl) != CURLE_OK) { $submitted = false; } } curl_multi_close($multi_curl_handler); return $submitted; } //generates sitemap public function generate_sitemap(){ $sitemap = new SimpleXMLElement(''); foreach($this->sitemap_urls as $url) { $url_tag = $sitemap->addChild("url"); $url_tag->addChild("loc", htmlspecialchars($url)); } return $sitemap->asXML(); } } ?>