zoukankan      html  css  js  c++  java
  • 大文件文本数据处理

    1.切割与合并

     #define  _CRT_SECURE_NO_WARNINGS
    #include <stdio.h>
    #include <stdlib.h>
    char *path = "Z:\I\尹成清华终极版C语言视频源码文档20150131\大数据相关数据\dangdangwang.txt";
    char *newpath = "Z:\I\尹成清华终极版C语言视频源码文档20150131\大数据相关数据\dangdangwangN.txt";
    #define N 13180820
    
    int getN(char *path)
    {
        FILE *pf = fopen(path, "r");
        if (pf==NULL)
        {
            return -1;
        } 
        else
        {
    
    
            int i = 0;
            while (!feof(pf))
            {
                char str[256] = { 0 };
                fgets(str, 256, pf);
                i++;
            }
    
    
            fclose(pf);
            return i;
        }
    
    
    
    
    
    、、
    }
    
    void space(char *path, int num)
    {
        char ** pathes = malloc(sizeof(char*)*num);
        for (int i = 0; i < num;i++)
        {
            pathes[i] = malloc(sizeof(char) * 256);
            sprintf(pathes[i], "Z:\I\尹成清华终极版C语言视频源码文档20150131\大数据相关数据\dangdangwang%d.txt", i + 1);
            //printf("
    %s", pathes[i]);
        }
        //100  10  10
        //100 9  8*12+4
        FILE *pf = fopen(path, "r");
        if (pf == NULL)
        {
            return -1;
        }
        else
        {
            //100/10=10
    
            if (N%num == 0)
            {
                //num个,N/num
                for (int i = 0; i <  num;i++)
                {
                    FILE *pfw = fopen(pathes[i], "w");//写入
                    for (int j = 0; j < N/num;j++)
                    {
                        char str[1024] = { 0 };
                        fgets(str, 1024, pf);
                        fputs(str, pfw);//读取一行写入一行
                    }
                    fclose(pfw);
                }
            }
            else
            {
                //100  9  8*12+4
                for (int i = 0; i < num-1; i++)
                {
                    FILE *pfw = fopen(pathes[i], "w");//写入
                    for (int j = 0; j < N / (num-1); j++)
                    {
                        char str[1024] = { 0 };
                        fgets(str, 1024, pf);
                        fputs(str, pfw);//读取一行写入一行
    
    
                    }
                    fclose(pfw);
    
                }
    
                {
                    FILE *pfw = fopen(pathes[num-1], "w");//写入
                    for (int j = 0; j < N %(num-1); j++)
                    {
                        char str[1024] = { 0 };
                        fgets(str, 1024, pf);
                        fputs(str, pfw);//读取一行写入一行
                    }
                    fclose(pfw);
                }
            }
            fclose(pf);
        }
    }
    
    void merge(char *newpath,int n)
    {
        char ** pathes = malloc(sizeof(char*)*n);
        for (int i = 0; i < n; i++)
        {
            pathes[i] = malloc(sizeof(char) * 256);
            sprintf(pathes[i], "Z:\I\尹成清华终极版C语言视频源码文档20150131\大数据相关数据\dangdangwang%d.txt", i + 1);
            //printf("
    %s", pathes[i]);
        }
        FILE *pf = fopen(newpath, "w");
        if (pf == NULL)
        {
            return -1;
        }
        else
        {
            for (int i = 0; i < n;i++)
            {
                FILE *pfr = fopen(pathes[i], "r");
    
                while (!feof(pfr))
                {
                    char str[1024] = { 0 };
                    fgets(str, 1024, pfr);
                    fputs(str, pf);//读取一行写入一行
                }
                fclose(pfr);
            }
            fclose(pf);
        }
    }
    
    void main()
    {
        //int num = getN(path);
        //printf("%d", num);获取行数
        int  num;
        scanf("%d", &num);
    
        space(path, num);
        merge(newpath, num);
    
        system("pause");
    }
  • 相关阅读:
    闭包如何产生内存消耗及性能消耗
    纯色半透明
    CSS选择符权重
    浅谈模块化的JavaScript
    WEB前端知识体系
    侃侃meta标签
    【读书笔记】读《编写可维护的JavaScript》 编程风格(第一部分)
    谈谈防御性编程
    很小的一个函数执行时间调试器Timer
    谈谈防止重复点击提交
  • 原文地址:https://www.cnblogs.com/sjxbg/p/5867073.html
Copyright © 2011-2022 走看看