邮件系统日志中含数百万条邮件记录,每条记录的格式如下:aa;jack349392900@gmail.com; bb;cc;dd,从数据库导出文本文件后进行处理。
程序如下:
1 <?php 2 3 /** 4 * 逐行读取字符串 截取分号之间的email地址另存为文件 5 * ---------------------------------------------------------------------------- 6 * Jacklee的博客 致力于php技术 7 * http://www.phpally.com 8 * ---------------------------------------------------------------------------- 9 * @author: Jacklee 10 * @email: jack349392900#gmail.com 11 * @date: 2012-02-27 12 */ 13 14 $sourceFile = fopen("source/emails_17.txt", "r") or exit("打开文件失败!"); 15 @$resultFile = fopen("results/emails_17.txt", "w"); 16 17 while(!feof($sourceFile)) 18 { 19 $str = fgets($sourceFile); 20 $result = getEmail($str); 21 $flag = validateEmail($result); 22 23 if($flag) 24 { 25 $lineData = $result; 26 fwrite($resultFile, $lineData); 27 } 28 } 29 fclose($sourceFile); 30 fclose($resultFile); 31 32 /* 将字符串以分号分割成数组 */ 33 function getEmail($str) 34 { 35 $str = explode(";" ,$str); 36 $str = $str[1] . "\n"; 37 return $str; 38 } 39 40 /* 邮箱验证 */ 41 function validateEmail($str) 42 { 43 return (preg_match('/^[_.0-9a-z-a-z-]+@([0-9a-z][0-9a-z-]+.)+[a-z]{2,4}$/', $str))? true: false; 44 } 45 ?>