zoukankan      html  css  js  c++  java
  • 大文件归并排序

    //大文件排序
    	function countsLines($path){
    		$fd = fopen($path,"r");
    		$total=0;
    		while(!feof($fd)){
    			$total++;
    			fgets($fd);
    		}
    		return $total;
    	}
    	$filePath = "./file.dat";
    	function checkFiles($path,$rows=5000){
    		$totalFiles = countsLines($path);
    		$totalFiles=ceil($totalFiles/$rows);
    		$files=[];
    		$fd=fopen($path,"r");
    		for($i=1;$i<=$totalFiles;$i++){
    			$fileName=$i.".dat";
    			$files[]=$fileName;
    			$sd = fopen($fileName,"w+");
    			$j=1;
    			while(!feof($fd) && $j <= $rows){
    				fputs($sd,fgets($fd));
    				$j++;
    			}
    			fclose($sd);
    		}
    		fclose($fd);
    		return $files;
    	}
    	function cSort($files){
    		foreach ($files as $key => $file) {
    			$content = file($file);
    			$content = array_map(function($value){
    				return trim($value);
    			},$content);
    			sort($content);
    			$fd = fopen($file,"w+");
    			foreach ($content as $key => $value) {
    				$value=$value.PHP_EOL;
    				fputs($fd,$value);
    			}
    		}
    	}
    	//合并数组
    	function mergeAllData($files){
    		while(count($files) > 1){
    			$fname1 = array_shift($files);
    			$fname2 = array_shift($files);
    			$newFileName = str_replace(".dat","", $fname1)."-".str_replace(".dat","",$fname2).".dat";
    			$fd1=  fopen($fname1,"r");
    			$fd2 = fopen($fname2,"r");
    			$fd3 = fopen($newFileName,"w+");
    
    			$line1 = fgets($fd1);
    			$line2 = fgets($fd2);
    			while(true){
    				$line1 = intval($line1);
    				$line2 = intval($line2);
    				if($line1 < $line2){
    					fputs($fd3,$line1.PHP_EOL);
    					$line1 = fgets($fd1);
    					if(feof($fd1)==true  || $line1 ===""){
    						fputs($fd3,$line2.PHP_EOL);
    						break;
    					}
    				}else{
    					fputs($fd3,$line2.PHP_EOL);
    					$line2=fgets($fd2);
    					if(feof($fd2)==true || $line2 === ""){
    						fputs($fd3,$line1.PHP_EOL);
    						break;
    					}
    
    				}	
    			}
    			while(!feof($fd1)){
    				$str =fgets($fd1);
    				if($str != ""){
    					fputs($fd3,$str);
    				}
    			}
    			while(!feof($fd2)){
    				$str =fgets($fd2);
    				if($str != ""){
    					fputs($fd3,$str);
    				}
    			}
    			fclose($fd1);
    			fclose($fd2);
    			fclose($fd3);
    			array_unshift($files, $newFileName);
    		}
    		
    	}
    	//切割
    	$files = checkFiles($filePath);
    	//合并
    	cSort($files);
    	mergeAllData($files);
    

      

  • 相关阅读:
    使用python实现深度神经网络 1(转)
    OHDSI——数据标准化
    TensorFlow相关
    语言模型和共现矩阵(转)
    cd4与cd8比值的意义
    python之使用set对列表去重,并保持列表原来顺序(转)
    Python 汉字转拼音
    Hadoop的启动和停止说明
    Scikit-learn 概述
    病历智能分析系统的研究与实现(转)
  • 原文地址:https://www.cnblogs.com/zh718594493/p/12089477.html
Copyright © 2011-2022 走看看