Interviewstreet String Similarity Challenge
Problem Statement
String similarity of two strings is defined as length of longest prefix common to both strings. For example string similarity for abcd and abb is 2, length of ab. Calculate sum of similarities of a string with each of its suffixes.Input: First line contains T, no of test cases and next T lines contain strings
Output: T lines contain answer.
Sample Input: Sample Output:
1 3
aa
Algorithm
1) calculate similarity value of string with each of its suffix, i.e if character at index i matches with 1st character calculate the similarity value for this suffix.2) For calculating similarity value, start counter with 0 and keep counting until prefix of the given suffix matches with original string. if doesn't match break and return count.
3) Keep calculating sum by adding every count value returned.
If you have a different approach in mind, please share it in comments section.
Solution
#include<stdio.h>
#include<string.h>
int getSimilarity(char str[],int sub_ind,int st);
int main()
{
int T,len=0,sum=0,i=0;
char s[100001];
scanf("%d",&T);
while(T--)
{
sum=0;
scanf("%s",s);
int y=strlen(s);
for(i=0;i<y;i++)
if(s[i]==s[0])
{
sum=sum+getSimilarity(s,i,y);
}
printf("%d\n",sum);
}
}
int getSimilarity(char str[],int sub_ind,int st)
{
int g=sub_ind;
int j, i=0;
int count=0;
for(i=g,j=0;i<st;i++)
if(str[i]==str[j++])
{
count++;
}
else
break;
return count;
}
Time Complexity#include<string.h>
int getSimilarity(char str[],int sub_ind,int st);
int main()
{
int T,len=0,sum=0,i=0;
char s[100001];
scanf("%d",&T);
while(T--)
{
sum=0;
scanf("%s",s);
int y=strlen(s);
for(i=0;i<y;i++)
if(s[i]==s[0])
{
sum=sum+getSimilarity(s,i,y);
}
printf("%d\n",sum);
}
}
int getSimilarity(char str[],int sub_ind,int st)
{
int g=sub_ind;
int j, i=0;
int count=0;
for(i=g,j=0;i<st;i++)
if(str[i]==str[j++])
{
count++;
}
else
break;
return count;
}
O(n*n), bcoz if first character of suffix matches with first character of original string then we calculate string similarity for this suffix with original string which takes O(n) time and there can be n such suffix matches.
hi could you tell me why my program fails to run the last six test cases saying
ReplyDelete"terminate called after throwing an instance of 'std::bad_alloc'
what(): std::bad_alloc
Aborted (core dumped)"
here's the program.
//program start
#include
#include
using namespace std;
void process(string *);
int similarity(string ,int );
int N=0,similar=0;
string *words=NULL;
int main()
{
cin>>N;
words=new string[N];
for(int i=0;i>words[i];
}
process(words);
return 0;
}
void process(string * words)
{
string temp;
for(int i=0;i<N;i++)
{
temp=words[i];
similar=similarity(temp,0);
cout<<similar<<endl;
}
}
//recurssion starts
int similarity(string x,int firstSuffix)
{
int counter=0;
int size=x.length();
if(firstSuffix==size)
{
return counter;
}
else
{
for(int i=0,j=firstSuffix;i<size;i++,j++)
{
if(x[i]==x[j]&&j<size)
{
counter++;
}
else
{
break;
}
}
counter+=similarity(x,firstSuffix+1);
}
return counter;
}
This comment has been removed by the author.
ReplyDeleteHi Sachin,
ReplyDeleteDoes your solution work??? I have a similiar N^2 solution and it fails 2 test cases saying time limit exceeeded.
My Solution:
#include
#include
using namespace std;
int main()
{
int count;
string s;
cin>>count;
while(count>0)
{
cin>>s;
int length,k;
int sum=length=s.length();
for(int i=1;i<length;i++)
{
for(k=0;k<length-i && s[k]==s[k+i];k++);
sum+=k;
}
cout<<sum<<endl;
count--;
}
return 0;
}
Yes, the above solution passes all test cases.
DeleteMy code is passing only the first test case..please will u find the reason?
ReplyDelete#include
#define TC 10
using namespace std;
int Similarity(string s);
int main(){
int t,i=0,j=0;
string s[TC];
cin>>t;
while(t--){
cin>>s[i];
cout<<Similarity(s[i++])<<endl;
}
return 0;
}
int Similarity(string s){
int i=0,cnt=0,sum=0;
int n=s.length();
string s1;
while(i<n){
s1=s.substr(i,n-i);
if(s.compare(s1)==0)
sum+=s.length();
else if(s.compare(s1)<0)
sum+=0;
else{
string::iterator it,it1;
for(it=s.begin(),it1=s1.begin();it<s.end()&&it1<s1.end();it++,it1++)
if(*it1==*it)
sum++;
else
break;
}
i++;
}
return sum;
}
Keep trying.
DeleteWouldn't creatin suffix trees be faster??
ReplyDelete