Wednesday, 20 June 2012

Interviewstreet String Similarity Solution C++


Interviewstreet String Similarity Challenge



Problem Statement

String similarity of two strings is defined as length of longest prefix common to both strings. For example string similarity for abcd and abb is 2, length of ab. Calculate sum of similarities of a string with each of its suffixes.
Input: First line contains T, no of test cases and next T lines contain strings
Output: T lines contain answer.
Sample Input:                                                  Sample Output:
1                                                                       3
aa

Algorithm

1) calculate similarity value of string with each of its suffix, i.e if character at index i matches with 1st character calculate the similarity value for this suffix.
2) For calculating similarity value, start counter with 0 and keep counting until prefix of the given suffix matches with original string. if doesn't match break and return count.
3) Keep calculating sum by adding every count value returned.

If you have a different approach in mind, please share it in comments section.

Solution

#include<stdio.h>
#include<string.h>

int getSimilarity(char str[],int sub_ind,int st);

int main()
{
    int T,len=0,sum=0,i=0;
    char s[100001];
    scanf("%d",&T);
    while(T--)
    {
        sum=0;
        scanf("%s",s);
        int y=strlen(s);
        for(i=0;i<y;i++)
            if(s[i]==s[0])
            {
               
                sum=sum+getSimilarity(s,i,y);
            }
        printf("%d\n",sum);
    }
}

int getSimilarity(char str[],int sub_ind,int st)
{
    int g=sub_ind;
    int j, i=0;
    int count=0;
    for(i=g,j=0;i<st;i++)
        if(str[i]==str[j++])
        {
            count++;
        }
        else
            break;
    return count;  
}

Time Complexity
O(n*n), bcoz if first character of suffix matches with first character of original string then we calculate string similarity for this suffix with original string which takes O(n) time and there can be n such suffix matches.



7 comments:

  1. hi could you tell me why my program fails to run the last six test cases saying

    "terminate called after throwing an instance of 'std::bad_alloc'
    what(): std::bad_alloc
    Aborted (core dumped)"

    here's the program.
    //program start
    #include
    #include

    using namespace std;

    void process(string *);
    int similarity(string ,int );

    int N=0,similar=0;
    string *words=NULL;

    int main()
    {
    cin>>N;
    words=new string[N];
    for(int i=0;i>words[i];
    }
    process(words);
    return 0;
    }

    void process(string * words)
    {
    string temp;
    for(int i=0;i<N;i++)
    {
    temp=words[i];
    similar=similarity(temp,0);
    cout<<similar<<endl;
    }
    }

    //recurssion starts
    int similarity(string x,int firstSuffix)
    {
    int counter=0;
    int size=x.length();
    if(firstSuffix==size)
    {
    return counter;
    }
    else
    {
    for(int i=0,j=firstSuffix;i<size;i++,j++)
    {
    if(x[i]==x[j]&&j<size)
    {
    counter++;
    }
    else
    {
    break;
    }
    }
    counter+=similarity(x,firstSuffix+1);
    }
    return counter;
    }

    ReplyDelete
  2. This comment has been removed by the author.

    ReplyDelete
  3. Hi Sachin,
    Does your solution work??? I have a similiar N^2 solution and it fails 2 test cases saying time limit exceeeded.

    My Solution:

    #include
    #include
    using namespace std;
    int main()
    {
    int count;
    string s;
    cin>>count;

    while(count>0)
    {

    cin>>s;
    int length,k;
    int sum=length=s.length();
    for(int i=1;i<length;i++)
    {

    for(k=0;k<length-i && s[k]==s[k+i];k++);


    sum+=k;
    }
    cout<<sum<<endl;
    count--;
    }
    return 0;
    }

    ReplyDelete
    Replies
    1. Yes, the above solution passes all test cases.

      Delete
  4. My code is passing only the first test case..please will u find the reason?
    #include
    #define TC 10
    using namespace std;
    int Similarity(string s);
    int main(){
    int t,i=0,j=0;
    string s[TC];
    cin>>t;
    while(t--){
    cin>>s[i];
    cout<<Similarity(s[i++])<<endl;
    }
    return 0;
    }
    int Similarity(string s){
    int i=0,cnt=0,sum=0;
    int n=s.length();
    string s1;
    while(i<n){
    s1=s.substr(i,n-i);
    if(s.compare(s1)==0)
    sum+=s.length();
    else if(s.compare(s1)<0)
    sum+=0;
    else{
    string::iterator it,it1;
    for(it=s.begin(),it1=s1.begin();it<s.end()&&it1<s1.end();it++,it1++)
    if(*it1==*it)
    sum++;
    else
    break;

    }
    i++;
    }
    return sum;

    }

    ReplyDelete
  5. Wouldn't creatin suffix trees be faster??

    ReplyDelete