1.查找最新插入的记录
SELECT * FROM `spider_test` ORDER BY id DESC LIMIT 1;
2.替换记录中不规则的url
url 字段中http://www.che168.com/dealer/99794/22067444.html
http://www.che168.com/dealer/75923/17959216.html#pvareaid=100496
将html后面的字符删除并更新到表中
UPDATE `spider_test` SET url=REPLACE(url,SUBSTRING(url,locate('.html', url),LENGTH(url)),".html")
3.查找并删除重复的记录
# 查找
SELECT * FROM `spider_test` WHERE id NOT IN (SELECT MIN(id) FROM `spider_test` GROUP BY url)
# 删除
DELETE FROM `spider_test` WHERE id NOT IN (SELECT minid FROM (SELECT MIN(id) AS minid FROM `spider_test` GROUP BY url) a);