From 7f185e24ffbc8b4c171219cbf3d4d7e44ecf45aa Mon Sep 17 00:00:00 2001
From: HuaiZhong Liu <lhuaizhong@gmail.com>
Date: Thu, 26 Nov 2015 21:37:09 +0800
Subject: [PATCH] little fixing

---
 sitespy.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/sitespy.py b/sitespy.py
index 1768b75..9ee181e 100644
--- a/sitespy.py
+++ b/sitespy.py
@@ -13,6 +13,8 @@
 import sys
 from urlparse import urljoin
 
+user_agent = 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.63 Safari/537.36'
+
 def scrape_tel(url, keys, col_site, html, identifier='office|tel|call|phone|T:|T', echo=False):
     tel = ''
     if tel == '' and ('.au' in keys or col_site.endswith('.au')): # in Australia
@@ -152,7 +154,9 @@ def sitespy(url, keys=[], echo=False):
         print 'website:%s' % col_site
 
     datas = {}
-    html = requests.get(url).text
+    if echo:
+        print 'Opening ' + url
+    html = requests.get(url, headers={'User-Agent':user_agent}).text
 
     #Initial keys.
     if 'email' in keys or 'siteemail' in keys:
@@ -173,7 +177,9 @@ def sitespy(url, keys=[], echo=False):
                     tmp = tree.xpath('//a[contains(*/text(),"contact") or contains(*/text(),"Contact") or contains(*/text(),"CONTACT")]/@href')
                 if tmp:
                     u = urljoin(url, tmp[0])
-                    html = requests.get(u).text
+                    if echo:
+                        print 'Opening ' + u
+                    html = requests.get(u, headers={'User-Agent':user_agent}).text
                 if echo :
                     print 'Scan contact page...'
             except Exception: