zoukankan      html  css  js  c++  java
  • poj_1743 后缀数组

    题目大意

        给定一串数字,长度为N。定义数字中的某个连续的子串为一个"theme",只要子串满足: 
    (1)长度 >= 5 
    (2)和该子串相同或者该子串的“变种串”在整串数字中出现次数大于1 
    (3)假设整串中有k个该子串及其“变种串”,那么其中至少有两个不相重叠 
        求满足要求的 "theme" 串的最长长度。

    题目分析

        (1)首先考虑将“变种”串和原子串相互比较的问题,对字符串中所有索引大于等于1的字符都用该字符减去前一个字符,这样得到串的差串之后,原theme和其“变种”就一样了,此时只需要求差串中的最长相同子串,且这些子串之间不重叠

        求最长相同子串,可以考虑使用后缀数组和height数组。显然,height越大,则两个子串的公共前缀越长,越有可能是最长相同子串。但是,题目对"theme"串的要求(3)至少两个不重叠,因此需要考虑height[i]在尽可能大的同时,保证SA[i]和SA[i-1]之间的差值要大于height[i]以保证不重叠

        (2)然后,试图求解是否存在长度为M的"theme"串。 
    容易看出,后缀Suffix(j)和Suffix(k)的最长公共前缀的长度为 height[rank[j]+1], height[rank[j]+2]...height[rank[k]]的最小值。i从1到N遍历,通过height[i]>=M将i分开,即将后缀分成若干组,每组中的后缀的公共前缀长度均大于等于M,且可以肯定组A中的某后缀t1和组B中的某后缀t2的公共前缀长度小于M。若存在这样的组,则可以确定找到了公共前缀大于等于M的子串,下一步需要确定这些子串不重叠。只需要在组内寻找 SA[i] 之间最大的查看,看是否大于子串的长度,若大于则可以确定不重叠。

        (3)最后,求解"theme"串长度M的最大值,用二分法对"theme"串的可能长度进行二分求解,长度范围为0到N。每次二分得到中值M,先判断能否找到长度为k的"theme"串,若不能,则减小M,否则增加M。直到找到长度M最大的"theme"串。

    实现(c++)

    #define _CRT_SECURE_NO_WARNINGS
    #include<stdio.h>
    #include<string.h>
    #define MAX(a, b) a>b? a:b
    #define MAX_ARRAY_SIZE 20005
    #define LETTERS 10000
    
    int gStrLen;
    int gStr[MAX_ARRAY_SIZE];
    int gCount[MAX_ARRAY_SIZE];
    int gSuffixArray[MAX_ARRAY_SIZE];
    int gRank[MAX_ARRAY_SIZE];
    int gOrderBySecondKey[MAX_ARRAY_SIZE];
    int gFirstKeyArray[MAX_ARRAY_SIZE];
    int gHeight[MAX_ARRAY_SIZE];
    
    bool Compare(int* arr, int a, int b, int step){
    	return arr[a] == arr[b] && arr[a + step] == arr[b + step];
    }
    
    
    void GetStr(char* str){
    	memset(gStr, 0, sizeof(gStr));
    	gStrLen = strlen(str);
    	for (int i = 0; i < gStrLen; i++){
    		gStr[i] = str[i] - 'a' + 1;
    	}
    	gStr[gStrLen++] = 0;
    }
    
    void GetSuffixArray(){
    	int n = gStrLen;
    	memset(gCount, 0, sizeof(gCount));
    	for (int i = 0; i < n; i++){
    		gRank[i] = gStr[i];
    		gCount[gRank[i]] ++;
    	}
    	for (int i = 1; i < LETTERS; i++){
    		gCount[i] += gCount[i - 1];
    	}
    	for (int i = n - 1; i >= 0; i--){
    		gSuffixArray[--gCount[gRank[i]]] = i;
    	}
    	int step = 1;
    	int* rank = gRank, *order_by_second_key = gOrderBySecondKey;
    	int m = LETTERS;
    	while (step < n){
    		int p = 0;
    		for (int i = n - step; i < n; i++){
    			order_by_second_key[p++] = i;
    		}
    		for (int i = 0; i < n; i++){
    			if (gSuffixArray[i] >= step){
    				order_by_second_key[p++] = gSuffixArray[i] - step;
    			}
    		}
    		for (int i = 0; i < n; i++){
    			gFirstKeyArray[i] = rank[order_by_second_key[i]];
    		}
    		for (int i = 0; i < m; i++){
    			gCount[i] = 0;
    		}
    		for (int i = 0; i < n; i++){
    			gCount[gFirstKeyArray[i]] ++;
    		}
    		for (int i = 1; i < m; i++){
    			gCount[i] += gCount[i - 1];
    		}
    		for (int i = n - 1; i >= 0; i--){
    			gSuffixArray[--gCount[gFirstKeyArray[i]]] = order_by_second_key[i];
    		}
    		int* tmp = rank;
    		rank = order_by_second_key;
    		order_by_second_key = tmp;
    		rank[gSuffixArray[0]] = 0;
    		p = 0;
    		for (int i = 1; i < n; i++){
    			if (Compare(order_by_second_key, gSuffixArray[i], gSuffixArray[i - 1], step)){
    				rank[gSuffixArray[i]] = p;
    			}
    			else{
    				rank[gSuffixArray[i]] = ++p;
    			}
    		}
    		m = p + 1;
    		step *= 2;
    	}
    }
    
    void GetHeight(){
    	int n = gStrLen;
    	for (int i = 1; i < n; i++){
    		gRank[gSuffixArray[i]] = i;
    	}
    	int k = 0, j;
    	gHeight[0] = 0;
    	for (int i = 0; i < n - 1; i++){
    		j = gSuffixArray[gRank[i] - 1];
    		if (k){
    			k--;
    		}
    		while (i + k < n && j + k < n && gStr[i + k] == gStr[j + k]){
    			k++;
    		}
    		gHeight[gRank[i]] = k;
    	}
    }
    bool Find(int k){
    	int end = 1;
    	int min_pos, max_pos;
    	while (end < gStrLen){
    		max_pos = min_pos = gSuffixArray[end-1];
    		while (end < gStrLen && gHeight[end] >= k - 1){
    			if (min_pos > gSuffixArray[end]){
    				min_pos = gSuffixArray[end];
    			}
    			if (max_pos < gSuffixArray[end]){
    				max_pos = gSuffixArray[end];
    			}
    
    			end ++;
    		}
    		if (max_pos - min_pos >= k){
    			return true;
    		}
    		end ++;
    	}
    	return false;
    }
    void printstr(int n){
    	printf("string = 
    ");
    	for (int i = 0; i < n; i++){
    		printf("%d ", gStr[i]);
    	}
    	printf("
    ");
    }
    void printsuffix(int n){
    	printf("suffix = 
    ");
    	for (int i = 0; i < n; i++){
    		printf("%d ", gSuffixArray[i]);
    	}
    	printf("
    ");
    }
    void printheigt(int n){
    	printf("height = 
    ");
    	for (int i = 0; i < n; i++){
    		printf("%d ", gHeight[i]);
    	}
    	printf("
    ");
    }
    int main(){
    	int n;
    	while (true){
    		scanf("%d", &n);
    		
    		if (n == 0){
    			break;
    		}
    
    		for (int i = 0; i < n; i++){
    			scanf("%d", &gStr[i]);
    		}
    		int min = 100;
    		for (int i = 1; i < n; i++){
    			gStr[i - 1] = gStr[i] - gStr[i - 1];
    			min = gStr[i - 1] < min ? gStr[i - 1] : min;
    		}
    		min--;
    		for (int i = 0; i < n; i++){
    			gStr[i] -= min;
    		}
    		gStr[n-1] = 0;
    		gStrLen = n;
    
    		GetSuffixArray();
    		GetHeight();
    //		printstr(n);
    //		printsuffix(n);
    //		printheigt(n);
    		int beg = 0, end = n, mid, max;
    		bool flag = true;
    		while (beg < end){
    			mid = (beg + end) / 2;
    			if (Find(mid)){
    				beg = mid + 1;
    				max = mid;
    			}
    			else{
    				if (mid <= 5){
    					flag = false;
    					break;
    				}
    				end = mid;
    			}
    		}
    		if (!flag){
    			printf("0
    ");
    		}
    		else{
    			printf("%d
    ", max);
    		}
    	}
    	return 0;
    }
    
  • 相关阅读:
    洛谷P1199三国游戏
    Cracking the Coding Interview 6.2
    Cracking the Coding Interview 5.2
    Cracking the Coding Interview 5.7
    洗牌算法
    字符串排列组合问题
    指针作为形参
    KMP算法代码
    搜索二叉树
    面试题集锦
  • 原文地址:https://www.cnblogs.com/gtarcoder/p/4835535.html
Copyright © 2011-2022 走看看