zoukankan      html  css  js  c++  java
  • java csv list cant not repeat

    require:

    /**
     * before:
     * file A1.csv {1,2,3,4,5}
     * file A2.csv {2,3,9,10,11}
     * file B1.csv {5,12,13,14,15}
     * file B2.csv {16,14,15,4,9,20,30}
     * A1.csv A2.csv A3.csv A4.csv cant not repeat
     *
     * after:
     * file A1.csv {1,4}
     * file A2.csv {2,3,10,11}
     * file B1.csv {12,13}
     * file B2.csv {16,9,20,30}
     */
    

      

    tangxin@tangxin:~/csvrepeat$ ls
    A1.csv  A2.csv  B1.csv  B2.csv
    

      

    CSVUtilVersion2.java

    import lombok.extern.slf4j.Slf4j;
    import org.apache.commons.collections.CollectionUtils;
    import org.apache.commons.io.FileUtils;
    import org.apache.commons.io.LineIterator;
    import org.springframework.util.StringUtils;
    
    import java.io.*;
    import java.lang.reflect.Array;
    import java.util.*;
    
    
    /**
     * before:
     * file A1.csv {1,2,3,4,5}
     * file A2.csv {2,3,9,10,11}
     * file B1.csv {5,12,13,14,15}
     * file B2.csv {16,14,15,4,9,20,30}
     * A1.csv A2.csv A3.csv A4.csv cant not repeat
     *
     * after:
     * file A1.csv {1,4}
     * file A2.csv {2,3,10,11}
     * file B1.csv {12,13}
     * file B2.csv {16,9,20,30}
     */
    @Slf4j
    public class CSVUtilVersion2 {
    
        private static final String CSV_PATH = "/home/tangxin/csvrepeat/";
        private static final boolean CREATE_SWITCH = true;
    
    
        /**
         * read single column data list
         * @param path
         * @return
         */
        public static List<String> ids(String path) {
            List<String> result = new ArrayList<>();
            File csv = new File(path);  // CSV文件路径
            LineIterator it = null;
            try {
                it = FileUtils.lineIterator(csv);
                while (it.hasNext()) {
                    String line = it.nextLine();
                    if (line.trim().contains("ID")) {
                        continue;
                    }
                    String[] arr = line.split(",");
                    String ID = arr[0];
                    ID = ID.replaceAll(""", "").trim();
                    if (!StringUtils.isEmpty(ID)) {
                        result.add(ID);
                    }
                }
            } catch (Exception e) {
                log.error("读取ID csv文件失败:{}", e.getMessage());
            } finally {
                LineIterator.closeQuietly(it);
            }
            return result;
        }
    
    
        /**
         * from src delete oth
         * @param src
         * @param oth
         * @return
         */
        public static List removeAll(List src, List oth) {
            LinkedList result = new LinkedList(src);
            HashSet othHash = new HashSet(oth);
            Iterator iter = result.iterator();
            while (iter.hasNext()) {
                if (othHash.contains(iter.next())) {
                    iter.remove();
                }
            }
            return result;
        }
    
    
        /**
         * -Xms1g -Xmx1g -XX:PermSize=128m -XX:SurvivorRatio=2 -XX:+UseParallelGC
         * @param args
         * @throws Exception
         */
        public static void main(String[] args) throws Exception {
    
    
            //∑=1+2+3+...+(n-1) group
    
            LinkedList<String> fileList = new LinkedList<>();
            fileList.add("A1.csv");
            fileList.add("A2.csv");
            fileList.add("B1.csv");
            fileList.add("B2.csv");
    //        fileList.add("C1.csv");
    
    
            DescartesRepeat(fileList);
    
            ded(fileList);
    
        }
    
        private static void DescartesRepeat(LinkedList<String> fileList) {
            Set<String> repeatList = new HashSet<>();
    
            Set<String> groupSet = new HashSet<>();
    
            Set<String> goONList = new HashSet<>();
    
    
            //A1->A2,B1,B2
            for (int i = 0; i < fileList.size(); i++) {
    
                String itemI = fileList.get(i);
    
                for (int j = 0; j < fileList.size(); j++) {
    
                    String itemJ = fileList.get(j);
    
                    if (!itemI.equals(itemJ)) {
    
                        String groupR1 = itemI + "->" + itemJ;
                        String groupR2 = itemJ + "->" + itemI;
    
                        if (groupSet.contains(groupR1) || groupSet.contains(groupR2)){
                            continue;
                        }
    
                        groupSet.add(groupR1);
    
    
                        String repeatT = repeat(CSV_PATH + itemI, CSV_PATH + itemJ);
                        if(!StringUtils.isEmpty(repeatT)){
                            repeatList.add(repeatT);
                            //System.out.println(groupR1+"->"+repeatT);
                        }
    
    
                    }
    
                }
            }
    
            if (CollectionUtils.isNotEmpty(repeatList)) {
    //            System.out.println(repeatList);
                for (String repeatItem : repeatList) {
                    Iterator<String> iterator = fileList.iterator();
                    while (iterator.hasNext()) {
                        String oldItem = iterator.next();
    
                        String oldS = oldItem.replace(".csv", "").replace("-new","");
                        String repeatS = repeatItem.replace(".csv","").replace("-new","");
                        if (repeatS.contains(oldS)) {
                            iterator.remove();
                            goONList.add(repeatItem);
                        }
                    }
                }
                fileList.addAll(goONList);
                System.out.println(fileList);
                DescartesRepeat(fileList);
            }
        }
    
    
        public static void ded(List<String> args) {
    
            //保证指定csv列表每组都不能有重复数据
            for (int i = 0; i < args.size(); i++) {
    //            if(i>0){
    //                continue;
    //            }
    
                String source = CSV_PATH + args.get(i);
    
                for (int j = 0; j < args.size(); j++) {
    
                    if (i == j) {
                        continue;
                    }
    
                    String target = CSV_PATH + args.get(j);
                    intersection(source, target);
                }
    
    
            }
    
    
        }
    
    
        public static void intersection(String sourcePath, String targetPath) {
            List<String> ids1 = ids(sourcePath);
            List<String> ids2 = ids(targetPath);
            List<String> inter = (List<String>) CollectionUtils.intersection(ids1, ids2);
            System.out.println(sourcePath + "和" + targetPath + "的重复数据大小" + inter.size());
        }
    
    
    
        public static String repeat(String source, String target){
            //cdd fund xyd
    
            List<String> ids1 = ids(source);
            List<String> ids2 = ids(target);
    
    //        System.out.println(source + "集合大小" + ids1.size());
    //        System.out.println(target + "集合大小" + ids2.size());
    
    
            List<String> inter = (List<String>) CollectionUtils.intersection(ids1, ids2);
    
    //        System.out.println("去重数据大小:" + inter.size());
    
    
    
            if (inter != null && inter.size() > 0) {
    
    
                if (ids1.size() > ids2.size()) {
                    return repeatInner(source, ids1, inter);
                } else if (ids2.size() > ids1.size()) {
                    return repeatInner(target, ids2, inter);
                } else {
                    return repeatInner(source, ids1, inter);
                }
    
    
            }
    
            return "";
        }
    
        private static String repeatInner(String source, List<String> ids, List<String> inter) {
            String newPath = source.replace(".csv", "-new.csv");
            List<String> ids1new = removeAll(ids, inter);
            createCSV(ids1new, newPath);
            return newPath.replace(CSV_PATH,"");
        }
    
    
    
        /**
         * 创建CSV文件
         */
        public static void createCSV(List<String> list, String fileName) {
    
    
            if(!CREATE_SWITCH){
    //            System.out.println("创建csv开关关闭");
                return;
            }else{
    //            System.out.println("创建csv开关开启");
            }
    
            // 表格头
            Object[] head = {"ID"};
            List<Object> headList = Arrays.asList(head);
    
            //数据
            List<List<Object>> dataList = new ArrayList<>();
            List<Object> rowList = null;
            for (int i = 0; i < list.size(); i++) {
                rowList = new ArrayList<>();
                rowList.add(list.get(i));
                dataList.add(rowList);
            }
    
            File csvFile;
            BufferedWriter csvWtriter = null;
            try {
                csvFile = new File(fileName);
                File parent = csvFile.getParentFile();
                if (parent != null && !parent.exists()) {
                    parent.mkdirs();
                }
                csvFile.createNewFile();
    
                // GB2312使正确读取分隔符","
                csvWtriter = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(csvFile), "GB2312"), 1024);
    
    
                // 写入文件头部
                writeRow(headList, csvWtriter);
    
                // 写入文件内容
                for (List<Object> row : dataList) {
                    writeRow(row, csvWtriter);
                }
                csvWtriter.flush();
            } catch (Exception e) {
                e.printStackTrace();
            } finally {
                try {
                    csvWtriter.close();
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }
        }
    
    
        /**
         * 写一行数据
         *
         * @param row       数据列表
         * @param csvWriter
         * @throws IOException
         */
        private static void writeRow(List<Object> row, BufferedWriter csvWriter) throws IOException {
            for (Object data : row) {
                StringBuffer sb = new StringBuffer();
                String rowStr = sb.append(""").append(data).append("",").toString();
                csvWriter.write(rowStr);
            }
            csvWriter.newLine();
        }
    
    }
    

      

    tangxin@tangxin:~/csvrepeat$ ls
    A1.csv  A1-new.csv  A1-new-new.csv  A2.csv  A2-new.csv  B1.csv  B2.csv  B2-new.csv  B2-new-new.csv
    

      

  • 相关阅读:
    golang不想http自动处理重定向的解决方案
    学习WebDav
    keepass+坚果云管理我的密码
    定制右键功能,看这一篇就够了
    翻转二叉树
    加密sqlite3数据库文件
    算出cron表达式接下来几次执行时间
    关于斐波那契数列的3种解法
    golang通过cgo调用lua
    学习go语言并完成第一个作品
  • 原文地址:https://www.cnblogs.com/fofawubian/p/8379266.html
Copyright © 2011-2022 走看看