Python代码:
1 #encoding:utf-8 2 3 import os 4 import time 5 import traceback 6 7 totalFiles = 0 8 totalStatics = 0 9 totalError = 0 10 totalCount = 0 11 supportedTypes = (".c", ".cpp", ".h", ".cc", ".cxx", ".hpp") 12 13 def CountOneFile(filePath): 14 fileCount = 0 15 try: 16 fin = open(filePath, "rt") 17 content = fin.read() 18 fileCount = content.count(" ") 19 #print "[%5d] %s" %(fileCount, filePath) 20 except: 21 global totalError 22 totalError += 1 23 traceback.print_exc() 24 return fileCount 25 26 def CountFiles(rootPath): 27 if os.path.isfile(rootPath): 28 global totalCount 29 global totalFiles 30 global totalStatics 31 totalFiles += 1 32 fileExten = os.path.splitext(rootPath)[1].lower() 33 if fileExten in supportedTypes: 34 totalStatics += 1 35 totalCount += CountOneFile(rootPath) 36 elif os.path.isdir(rootPath): 37 childPaths = os.listdir(rootPath) 38 for childPath in childPaths: 39 CountFiles(os.path.join(rootPath, childPath)) 40 else: 41 print "[error] %s" %rootPath 42 43 root = "D:\Codes\cmac_1225\pub" 44 45 start = time.clock() 46 CountFiles(root) 47 end = time.clock() 48 print "Files: %d, Used: %d" %(totalFiles, totalStatics) 49 print "Lines: %d, Time: %d" %(totalCount, end-start)
C语言代码:
1 /* SourceCount.h */ 2 #ifndef _SOURCE_COUNT_H 3 #define _SOURCE_COUNT_H 4 5 #ifndef NULL 6 #define NULL 0 7 #endif 8 9 #define MAX_CMD_LEN 2048 10 #define MAX_PATH_LEN 256 11 #define MAX_THREAD_NUM 20 12 #define DEFAULT_THREAD_NUM 2 13 #define EXTEN_NUM 6 14 #define READ_BUFF_SIZE 1024 15 16 #define CHECK_NULL_RET(var, ret) if(NULL == var) return ret; 17 #define PROC_THREAD_NUM(threadNum) ((threadNum > 0 && threadNum <= MAX_THREAD_NUM) ? threadNum : DEFAULT_THREAD_NUM) 18 #define RM_LINE_BREAK(path) if(strlen(path)>0 && path[strlen(path)-1] == ' ') path[strlen(path)-1] = ' '; 19 #define STR_LOWER(str) strlwr(str) 20 21 const char extensions[EXTEN_NUM][MAX_PATH_LEN] = {".c", ".cc", ".cxx", ".cpp", ".h", ".hpp"}; 22 23 #endif
1 /* SourceCount.cpp */ 2 #include <cstdlib> 3 #include <Windows.h> 4 #include <stdio.h> 5 #include <time.h> 6 #include "SourceCount.h" 7 8 HANDLE hMutex4File; 9 HANDLE hMutex4Stat; 10 11 FILE *pipe = NULL; 12 int totalCount = 0; 13 14 int CountFiles(const char *root, const int threadNum); 15 DWORD WINAPI ThreadProc(LPVOID lpParameter); 16 int CountOneFile(const char *filePath); 17 int CheckFileExten(const char *exten); 18 19 int main() 20 { 21 char testRoot[] = "D:\Codes\cmac_1225"; 22 char testThreadNum = 12; 23 24 clock_t start = clock(); 25 CountFiles(testRoot, testThreadNum); 26 printf("Total: %d, time: %d ", totalCount, clock()-start); 27 system("pause"); 28 return 0; 29 } 30 31 int CountFiles(const char *root, const int threadNum) 32 { 33 HANDLE pHandles[MAX_THREAD_NUM]; 34 char command[MAX_CMD_LEN]; 35 int handleNum; 36 int loop; 37 int ret; 38 39 CHECK_NULL_RET(root, 0); 40 41 sprintf(command, "dir "%s" /s /b", root); 42 pipe = _popen(command, "rt"); // 执行命令,生成管道 43 CHECK_NULL_RET(pipe, 0); 44 45 // 创建互斥量 46 hMutex4File = CreateMutex(NULL, false, NULL); 47 hMutex4Stat = CreateMutex(NULL, false, NULL); 48 49 handleNum = PROC_THREAD_NUM(threadNum); // 确定Thread数 50 for(loop = 0; loop < handleNum; loop++) 51 { 52 pHandles[loop] = CreateThread(NULL, 0, ThreadProc, NULL, 0, NULL); // 创建线程 53 } 54 WaitForMultipleObjects(handleNum, pHandles, true, INFINITE); // 等待所有线程执行完毕 55 for(loop = 0; loop < handleNum; loop++) 56 { 57 CloseHandle(pHandles[loop]); // 释放HANDLE资源 58 } 59 _pclose(pipe); // 关闭管道 60 return totalCount; 61 } 62 63 /// 进程函数 64 DWORD WINAPI ThreadProc(LPVOID lpParameter) 65 { 66 char filePath[MAX_PATH_LEN]; 67 char fileExten[MAX_PATH_LEN]; 68 int ret; 69 int lines; 70 71 while(true) 72 { 73 WaitForSingleObject(hMutex4File, INFINITE); // 获取pipe互斥权限 74 if(feof(pipe)) 75 { 76 ReleaseMutex(hMutex4File); 77 return 0; 78 } 79 fgets(filePath, MAX_PATH_LEN, pipe); 80 ReleaseMutex(hMutex4File); 81 82 RM_LINE_BREAK(filePath); 83 _splitpath(filePath, NULL, NULL, NULL, fileExten); 84 STR_LOWER(fileExten); 85 ret = CheckFileExten(fileExten); 86 if(ret == 1) 87 { 88 lines = CountOneFile(filePath); 89 WaitForSingleObject(hMutex4Stat, INFINITE); // 获取stat互斥权限 90 totalCount += lines; 91 ReleaseMutex(hMutex4Stat); 92 } 93 } 94 return 0; 95 } 96 97 int CountOneFile(const char *filePath) 98 { 99 FILE *fp = fopen(filePath, "rt"); 100 int lineCount = 0; 101 int readBytes; 102 int loop; 103 unsigned char buff[READ_BUFF_SIZE]; 104 105 CHECK_NULL_RET(fp, 0); 106 while(!feof(fp)) 107 { 108 readBytes = fread(buff, 1, READ_BUFF_SIZE, fp); 109 for(loop=0; loop<readBytes; loop++) 110 { 111 if(buff[loop] == 0x0a) 112 { 113 lineCount++; 114 } 115 } 116 } 117 fclose(fp); 118 return lineCount; 119 } 120 121 int CheckFileExten(const char *exten) 122 { 123 int loop; 124 for(loop=0; loop<EXTEN_NUM; loop++) 125 { 126 if(strcmp(exten, extensions[loop]) == 0) 127 { 128 return 1; 129 } 130 } 131 return 0; 132 }
测试:
文件总数:21984
大小:991MB
满足条件文件总数:20274
代码总行数:6575561
Python用时:949s
C语言:
4 线程,读取缓冲区 1024,用时80s
6 线程,读取缓冲区 1024,用时66.7s
8 线程,读取缓冲区 1024,用时53.5s
10 线程,读取缓冲区 1024,用时41.8s
10 线程,读取缓冲区 2048,用时16s
10 线程,读取缓冲区 4096,用时16s
12 线程,读取缓冲区 2048,用时14.9s
最佳:12线程,缓冲区2048
参数处理:
void PrintHelpInfo()
{
printf("代码行统计工具
");
printf("SourceCount [drive:][path][filename] [ ...] [/E extension [ ...]] [/T threadnum]");
printf(" [/S buffsize] [/P [filename]]
");
printf("
");
printf(" [drive:][path][filename]
");
printf(" 指定要统计的路径和/或文件。
");
printf("
");
printf(" /E extension 指定文件类型。
");
printf(" /T threadnum 指定使用的线程数量。
");
printf(" /S buffsize 指定文件读取缓冲区的大小。
");
printf(" /P [filename] 输出文件统计结果[到文件]。
");
printf("
");
system("pause");
}