zoukankan      html  css  js  c++  java
  • java csv list cant not repeat

    require:

    /**
     * before:
     * file A1.csv {1,2,3,4,5}
     * file A2.csv {2,3,9,10,11}
     * file B1.csv {5,12,13,14,15}
     * file B2.csv {16,14,15,4,9,20,30}
     * A1.csv A2.csv A3.csv A4.csv cant not repeat
     *
     * after:
     * file A1.csv {1,4}
     * file A2.csv {2,3,10,11}
     * file B1.csv {12,13}
     * file B2.csv {16,9,20,30}
     */
    

      

    tangxin@tangxin:~/csvrepeat$ ls
    A1.csv  A2.csv  B1.csv  B2.csv
    

      

    CSVUtilVersion2.java

    import lombok.extern.slf4j.Slf4j;
    import org.apache.commons.collections.CollectionUtils;
    import org.apache.commons.io.FileUtils;
    import org.apache.commons.io.LineIterator;
    import org.springframework.util.StringUtils;
    
    import java.io.*;
    import java.lang.reflect.Array;
    import java.util.*;
    
    
    /**
     * before:
     * file A1.csv {1,2,3,4,5}
     * file A2.csv {2,3,9,10,11}
     * file B1.csv {5,12,13,14,15}
     * file B2.csv {16,14,15,4,9,20,30}
     * A1.csv A2.csv A3.csv A4.csv cant not repeat
     *
     * after:
     * file A1.csv {1,4}
     * file A2.csv {2,3,10,11}
     * file B1.csv {12,13}
     * file B2.csv {16,9,20,30}
     */
    @Slf4j
    public class CSVUtilVersion2 {
    
        private static final String CSV_PATH = "/home/tangxin/csvrepeat/";
        private static final boolean CREATE_SWITCH = true;
    
    
        /**
         * read single column data list
         * @param path
         * @return
         */
        public static List<String> ids(String path) {
            List<String> result = new ArrayList<>();
            File csv = new File(path);  // CSV文件路径
            LineIterator it = null;
            try {
                it = FileUtils.lineIterator(csv);
                while (it.hasNext()) {
                    String line = it.nextLine();
                    if (line.trim().contains("ID")) {
                        continue;
                    }
                    String[] arr = line.split(",");
                    String ID = arr[0];
                    ID = ID.replaceAll(""", "").trim();
                    if (!StringUtils.isEmpty(ID)) {
                        result.add(ID);
                    }
                }
            } catch (Exception e) {
                log.error("读取ID csv文件失败:{}", e.getMessage());
            } finally {
                LineIterator.closeQuietly(it);
            }
            return result;
        }
    
    
        /**
         * from src delete oth
         * @param src
         * @param oth
         * @return
         */
        public static List removeAll(List src, List oth) {
            LinkedList result = new LinkedList(src);
            HashSet othHash = new HashSet(oth);
            Iterator iter = result.iterator();
            while (iter.hasNext()) {
                if (othHash.contains(iter.next())) {
                    iter.remove();
                }
            }
            return result;
        }
    
    
        /**
         * -Xms1g -Xmx1g -XX:PermSize=128m -XX:SurvivorRatio=2 -XX:+UseParallelGC
         * @param args
         * @throws Exception
         */
        public static void main(String[] args) throws Exception {
    
    
            //∑=1+2+3+...+(n-1) group
    
            LinkedList<String> fileList = new LinkedList<>();
            fileList.add("A1.csv");
            fileList.add("A2.csv");
            fileList.add("B1.csv");
            fileList.add("B2.csv");
    //        fileList.add("C1.csv");
    
    
            DescartesRepeat(fileList);
    
            ded(fileList);
    
        }
    
        private static void DescartesRepeat(LinkedList<String> fileList) {
            Set<String> repeatList = new HashSet<>();
    
            Set<String> groupSet = new HashSet<>();
    
            Set<String> goONList = new HashSet<>();
    
    
            //A1->A2,B1,B2
            for (int i = 0; i < fileList.size(); i++) {
    
                String itemI = fileList.get(i);
    
                for (int j = 0; j < fileList.size(); j++) {
    
                    String itemJ = fileList.get(j);
    
                    if (!itemI.equals(itemJ)) {
    
                        String groupR1 = itemI + "->" + itemJ;
                        String groupR2 = itemJ + "->" + itemI;
    
                        if (groupSet.contains(groupR1) || groupSet.contains(groupR2)){
                            continue;
                        }
    
                        groupSet.add(groupR1);
    
    
                        String repeatT = repeat(CSV_PATH + itemI, CSV_PATH + itemJ);
                        if(!StringUtils.isEmpty(repeatT)){
                            repeatList.add(repeatT);
                            //System.out.println(groupR1+"->"+repeatT);
                        }
    
    
                    }
    
                }
            }
    
            if (CollectionUtils.isNotEmpty(repeatList)) {
    //            System.out.println(repeatList);
                for (String repeatItem : repeatList) {
                    Iterator<String> iterator = fileList.iterator();
                    while (iterator.hasNext()) {
                        String oldItem = iterator.next();
    
                        String oldS = oldItem.replace(".csv", "").replace("-new","");
                        String repeatS = repeatItem.replace(".csv","").replace("-new","");
                        if (repeatS.contains(oldS)) {
                            iterator.remove();
                            goONList.add(repeatItem);
                        }
                    }
                }
                fileList.addAll(goONList);
                System.out.println(fileList);
                DescartesRepeat(fileList);
            }
        }
    
    
        public static void ded(List<String> args) {
    
            //保证指定csv列表每组都不能有重复数据
            for (int i = 0; i < args.size(); i++) {
    //            if(i>0){
    //                continue;
    //            }
    
                String source = CSV_PATH + args.get(i);
    
                for (int j = 0; j < args.size(); j++) {
    
                    if (i == j) {
                        continue;
                    }
    
                    String target = CSV_PATH + args.get(j);
                    intersection(source, target);
                }
    
    
            }
    
    
        }
    
    
        public static void intersection(String sourcePath, String targetPath) {
            List<String> ids1 = ids(sourcePath);
            List<String> ids2 = ids(targetPath);
            List<String> inter = (List<String>) CollectionUtils.intersection(ids1, ids2);
            System.out.println(sourcePath + "和" + targetPath + "的重复数据大小" + inter.size());
        }
    
    
    
        public static String repeat(String source, String target){
            //cdd fund xyd
    
            List<String> ids1 = ids(source);
            List<String> ids2 = ids(target);
    
    //        System.out.println(source + "集合大小" + ids1.size());
    //        System.out.println(target + "集合大小" + ids2.size());
    
    
            List<String> inter = (List<String>) CollectionUtils.intersection(ids1, ids2);
    
    //        System.out.println("去重数据大小:" + inter.size());
    
    
    
            if (inter != null && inter.size() > 0) {
    
    
                if (ids1.size() > ids2.size()) {
                    return repeatInner(source, ids1, inter);
                } else if (ids2.size() > ids1.size()) {
                    return repeatInner(target, ids2, inter);
                } else {
                    return repeatInner(source, ids1, inter);
                }
    
    
            }
    
            return "";
        }
    
        private static String repeatInner(String source, List<String> ids, List<String> inter) {
            String newPath = source.replace(".csv", "-new.csv");
            List<String> ids1new = removeAll(ids, inter);
            createCSV(ids1new, newPath);
            return newPath.replace(CSV_PATH,"");
        }
    
    
    
        /**
         * 创建CSV文件
         */
        public static void createCSV(List<String> list, String fileName) {
    
    
            if(!CREATE_SWITCH){
    //            System.out.println("创建csv开关关闭");
                return;
            }else{
    //            System.out.println("创建csv开关开启");
            }
    
            // 表格头
            Object[] head = {"ID"};
            List<Object> headList = Arrays.asList(head);
    
            //数据
            List<List<Object>> dataList = new ArrayList<>();
            List<Object> rowList = null;
            for (int i = 0; i < list.size(); i++) {
                rowList = new ArrayList<>();
                rowList.add(list.get(i));
                dataList.add(rowList);
            }
    
            File csvFile;
            BufferedWriter csvWtriter = null;
            try {
                csvFile = new File(fileName);
                File parent = csvFile.getParentFile();
                if (parent != null && !parent.exists()) {
                    parent.mkdirs();
                }
                csvFile.createNewFile();
    
                // GB2312使正确读取分隔符","
                csvWtriter = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(csvFile), "GB2312"), 1024);
    
    
                // 写入文件头部
                writeRow(headList, csvWtriter);
    
                // 写入文件内容
                for (List<Object> row : dataList) {
                    writeRow(row, csvWtriter);
                }
                csvWtriter.flush();
            } catch (Exception e) {
                e.printStackTrace();
            } finally {
                try {
                    csvWtriter.close();
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }
        }
    
    
        /**
         * 写一行数据
         *
         * @param row       数据列表
         * @param csvWriter
         * @throws IOException
         */
        private static void writeRow(List<Object> row, BufferedWriter csvWriter) throws IOException {
            for (Object data : row) {
                StringBuffer sb = new StringBuffer();
                String rowStr = sb.append(""").append(data).append("",").toString();
                csvWriter.write(rowStr);
            }
            csvWriter.newLine();
        }
    
    }
    

      

    tangxin@tangxin:~/csvrepeat$ ls
    A1.csv  A1-new.csv  A1-new-new.csv  A2.csv  A2-new.csv  B1.csv  B2.csv  B2-new.csv  B2-new-new.csv
    

      

  • 相关阅读:
    HTML元素解释
    Java命名规范
    HDU 1058 Humble Numbers(DP,数)
    HDU 2845 Beans(DP,最大不连续和)
    HDU 2830 Matrix Swapping II (DP,最大全1矩阵)
    HDU 2870 Largest Submatrix(DP)
    HDU 1421 搬寝室(DP)
    HDU 2844 Coins (组合背包)
    HDU 2577 How to Type(模拟)
    HDU 2159 FATE(二维完全背包)
  • 原文地址:https://www.cnblogs.com/fofawubian/p/8379266.html
Copyright © 2011-2022 走看看