代码如下:
def crawlAndGet(keyword, n): if n>0: jsondata = os.system('scrapy crawl beijing -a store_dir="./webserver/img" -a keyword="'+keyword+'"') if jsondata is not None and jsondata != 0: return jsondata else: return crawlAndGet(keyword,n-1) if n<=0: return [{'data':'null'}] def checkCompany(keyword = None): conn = MySQLdb.connect(host="localhost", user="root", passwd="", db='qixin', port=3306, charset="utf8") cursor = conn.cursor() sql="select identify_id,name,ctype,owner,capital,date_found,address,date_from,date_to,scope,reg_auth,date_check,reg_status from qx_company where name = '"+keyword+"'" cursor.execute(sql) row = cursor.fetchone() if row is None: return crawlAndGet(keyword, 10) else: obj = [{'identify_id': row[0].encode("utf-8")},{'name': row[1].encode("utf-8")}, {'ctype': row[2].encode("utf-8")}, {'owner': row[3].encode("utf-8")},
{'capital': row[4].encode("utf-8")}, {'date_found': row[5].encode("utf-8")}, {'address': row[6].encode("utf-8")},
{'date_from': row[7].encode("utf-8")}, {'date_to': row[8].encode("utf-8")}, {'scope': row[9].encode("utf-8")}, {'reg_auth': row[10].encode("utf-8")},
{'date_check': row[11].encode("utf-8")}, {'reg_status': row[12].encode("utf-8")}] return json.dumps(obj) cursor.close() if __name__ == '__main__': print checkCompany('北京正元商贸有限公司')
在执行checkCompany里的递归函数crawlAndGet是,本以为jsondata有值的时候就会return,发现其实他并没有return直到n<=0时。原因其实时,return jsondata只是代表crawlAndGet(keyword, n-i)执行完了,还要继续往下走。
为了测试这个原理,修改成如下代码:
def crawlAndGet(keyword): jsondata = os.system('scrapy crawl beijing -a store_dir="./webserver/img" -a keyword="'+keyword+'"') if jsondata is not None and jsondata != 0: return jsondata else : return None def checkCompany(keyword = None): conn = MySQLdb.connect(host="localhost", user="root", passwd="", db='qixin', port=3306, charset="utf8") cursor = conn.cursor() sql="select identify_id,name,ctype,owner,capital,date_found,address,date_from,date_to,scope,reg_auth,date_check,reg_status from qx_company where name = '"+keyword+"'" cursor.execute(sql) row = cursor.fetchone() if row is None: data = crawlAndGet(keyword) if data is None: data1 = crawlAndGet(keyword) else: return data if data1 is None: data2 = crawlAndGet(keyword) else: return data1 if data2 is None: data3 = crawlAndGet(keyword) else: return data3 if data3 is None: data4 = crawlAndGet(keyword) else: return data4 else: obj = [{'identify_id': row[0].encode("utf-8")},{'name': row[1].encode("utf-8")}] return json.dumps(obj) cursor.close() if __name__ == '__main__': print checkCompany('北京正元商贸有限公司')
发现即使data1,data2有return数据了,他还是会直接执行到return data4之后才返回,奇怪。