Problem Statement

Given two strings str1 and str2, find the length of their longest common substring.

A substring is a contiguous sequence of characters within a string.

Examples

Example 1:

Input: str1 = "abcde", str2 = "abfce"
Output: 2

Explanation: The longest common substring is "ab" of length 2.
Example 2:

Input: str1 = "abcdxyz", str2 = "xyzabcd"
Output: 4

Explanation: The longest common substring is "abcd" of length 4.

Different Approaches

1️⃣ Recursive Approach

Intuition:

  • Start from the last characters and check if they match.
  • When a match is found, continue reducing the indices (i and j) and increasing the count.
  • If a mismatch occurs, reset the count and explore other possibilities by excluding characters from either string.

Approach:

  • Recursive Function
    Let LCSHelper(i, j, count) represent the length of the longest common substring ending at positions i in str1 and j in str2, where count tracks the length of the current substring.
  • Base Cases
    • If either i < 0 or j < 0, return the current count.
  • Transition
    • If str1[i] == str2[j], increment the count and recursively call LCSHelper(i-1, j-1, count).
    • Otherwise:
      • Reset count to 0 and explore two possibilities:
        • Exclude the current character of str1: LCSHelper(i-1, j, 0)
        • Exclude the current character of str2: LCSHelper(i, j-1, 0)
  • Result
    The result is the maximum of all recursive calls.

Code:

#include <iostream>
#include <string>
#include <algorithm>
using namespace std;

int LCSHelper(const string& str1, const string& str2, int i, int j, int count) {
    // Base case: if we are out of bounds
    if (i < 0 || j < 0) {
        return count;
    }

    int currentCount = count;

    // Case 1: Characters match
    if (str1[i] == str2[j]) {
        currentCount = LCSHelper(str1, str2, i - 1, j - 1, count + 1);
    }

    // Case 2: Characters do not match
    int excludeStr1 = LCSHelper(str1, str2, i - 1, j, 0);
    int excludeStr2 = LCSHelper(str1, str2, i, j - 1, 0);

    // Return the maximum count found
    return max(currentCount, max(excludeStr1, excludeStr2));
}

int longestCommonSubstring(const string& str1, const string& str2) {
    return LCSHelper(str1, str2, str1.size() - 1, str2.size() - 1, 0);
}

int main() {
    string str1 = "ABABC";
    string str2 = "BABCAC";
    cout << "Length of Longest Common Substring: " << longestCommonSubstring(str1, str2) << endl;
    return 0;
}
class Solution {
  public:
    // Recursive function to find the Longest Common Substring
    int recursive(string& str1, string& str2, int index1, int index2, int count) {
        // Base case: If either string is fully traversed
        if (index1 >= str1.size() || index2 >= str2.size()) {
            return count;
        }

        // If characters match, increment count and continue
        if (str1[index1] == str2[index2]) {
            count = recursive(str1, str2, index1 + 1, index2 + 1, count + 1);
        }

        // Explore the possibilities by resetting the count to 0
        int skipStr1 = recursive(str1, str2, index1 + 1, index2, 0);
        int skipStr2 = recursive(str1, str2, index1, index2 + 1, 0);

        // Return the maximum of the current count and other possibilities
        return max({count, skipStr1, skipStr2});
    }

    // Public function to find the Longest Common Substring
    int longestCommonSubstr(string str1, string str2) {
        return recursive(str1, str2, 0, 0, 0);
    }
};
class Solution {
public:
    // Recursive function to find the Longest Common Substring
    int recursive(string& str1, string& str2, int index1, int index2, int count, int maxLength) {
        // If either string is fully traversed, return the maxLength found so far
        if (index1 >= str1.size() || index2 >= str2.size()) {
            return maxLength;
        }

        // If characters match, increment the count and continue
        if (str1[index1] == str2[index2]) {
            count = recursive(str1, str2, index1 + 1, index2 + 1, count + 1, max(count + 1, maxLength));
        } else {
            // If characters do not match, reset the count
            count = 0;
        }

        // Recursively try skipping one character from either string
        int skipStr1 = recursive(str1, str2, index1 + 1, index2, 0, maxLength);
        int skipStr2 = recursive(str1, str2, index1, index2 + 1, 0, maxLength);

        // Return the maximum length of the common substring found
        return max({count, skipStr1, skipStr2});
    }

    // Public function to find the Longest Common Substring
    int longestCommonSubstr(string str1, string str2) {
        // Initialize the recursive function with the starting indices of both strings and initial values
        return recursive(str1, str2, 0, 0, 0, 0);
    }
};

Complexity Analysis:

  • Time Complexity: O(2^(m+n)), where m and n are the lengths of str1 and str2.
    • This is due to overlapping subproblems, as the same pairs of indices are recomputed multiple times.
  • Space Complexity: O(m + n), because of the recursion stack.

2️⃣ Tabulation

We are given a string S, the simplest approach will be to generate all the subsequences and store them, then manually find out the longest common substring. This naive approach will give us the correct answer but to generate all the subsequences, we will require exponential ( 2n ) time. Therefore we will try some other approaches.

Using Dynamic Programming:

We can modify the approach we used in the article Longest Common Subsequence, in order to find the longest common substring. The main distinguishing factor between the two is the consecutiveness of the characters.

While finding the longest common subsequence, we were using two pointers (ind1 and ind2) to map the characters of the two strings. We will again have the same set of conditions for finding the longest common substring, with slight modifications to what we do when the condition becomes true.

We will try to form a solution in the bottom-up (tabulation) approach. We will set two nested loops with loop variables i and j.

Thinking in terms of consecutiveness of characters, we have two conditions:

  • Declare a dp array of size [n][m]: where n and m are the lengths of S1 and S2 respectively. As there are two changing parameters in the recursive solution, 'ind1' and 'ind2'. The maximum value 'ind1' can attain is (n) and 'ind2' can attain maximum value of (m). Therefore, we need 2D dp array.
  • If (S1[i-1] != S2[j-1]), the characters don’t match, therefore the consecutiveness of characters is broken. So we set the cell value (dp[i][j]) as 0.
  • If (S1[i-1] == S2[j-1]), then the characters match and we simply set its value to 1+dp[i-1][j-1]. We have done so because dp[i-1][j-1] gives us the longest common substring till the last cell character (current strings -{matching character}). As the current cell’s character is matching we are adding 1 to the consecutive chain.
  • Note: dp[n][m] will not give us the answer; rather the maximum value in the entire dp array will give us the length of the longest common substring. This is because there is no restriction that the longest common substring is present at the end of both the strings.

Code:

#include <bits/stdc++.h>
using namespace std;

class Solution {
public:
    /* Function to find the length of 
    the Longest Common Substring (LCS) */
    int longestCommonSubstr(string str1, string str2) {
        int n = str1.size();
        int m = str2.size();
    
        // Initialize a 2D DP table with dimensions
        vector<vector<int>> dp(n + 1, vector<int>(m + 1, 0));

        // Initialize the answer variable
        int ans = 0;

        for (int i = 1; i <= n; i++) {
            for (int j = 1; j <= m; j++) {
                // Characters match, increment substring length
                if (str1[i - 1] == str2[j - 1]) {
                    dp[i][j] = 1 + dp[i - 1][j - 1];
                    
                    /* Update the maximum substring 
                    length found so far */
                    ans = max(ans, dp[i][j]);
                } else {
                    /* Characters don't match, 
                    substring length becomes 0 */
                    dp[i][j] = 0;
                }
            }
        }
        // Return the length of Longest Common Substring
        return ans;
    }
};

int main() {
    string s1 = "abcjklp";
    string s2 = "acjkp";

    // Create an instance of Solution class
    Solution sol;
    
    // Print the result
    cout << "The Length of Longest Common Substring is " << sol.longestCommonSubstr(s1, s2) << endl;

    return 0;
}

Complexity Analysis:

  • Time Complexity: O(N*M), Where N is the length of string S1 and M is the length of the string S2. As there are two nested loops, which is running for N*M.
  • Space Complexity: O(N*M), We are using an external array of size ‘N*M’.

4️⃣ Space Optimization

If we observe the relation obtained in tabultaion approach, dp[i-1][j-1]. We are using just the previous row, dp[ind1-1][ ], in order to calculate the present row.

So we are not required to contain an entire array, we can simply have two rows 'prev' and 'cur' where prev corresponds to dp[ind-1] and cur to dp[ind].

After declaring 'prev' and 'cur', replace dp[ind-1] to prev and dp[ind] with cur in the tabulation code. Following the same logic of tabulation, we will calculate cur[ind] and store the maximum of all the in 'ans variable in each iteration and after the inner loop executes, we will set prev = cur, so that the cur row can serve as prev for the next index.

After end of the execution, return 'ans' as our answer, as it will hold the maximum length of common substring.

Code:

#include <bits/stdc++.h>
using namespace std;

class Solution {
public:
    /* Function to find the length of 
    the Longest Common Substring (LCS) */
    int longestCommonSubstr(string str1, string str2) {
        int n = str1.size();
        int m = str2.size();
    
        /* Initialize two vectors to store 
        previous and current row values*/
        vector<int> prev(m+1, 0);
        vector<int> cur(m+1, 0);  
        
        /* Initialize the answer variable to
        store the maximum LCS length found*/
        int ans = 0; 
    
        // Loop through both strings
        for(int i = 1; i <= n; i++){
            for(int j = 1; j <= m; j++){
                // Characters match, increment substring length
                if(str1[i-1] == str2[j-1]){
                    int val = 1 + prev[j-1]; 
                    cur[j] = val; 
                    
                    /* Update the maximum substring
                    length found so far*/
                    ans = max(ans, val); 
                }
                else{
                    /* Characters don't match,
                    substring length becomes 0*/
                    cur[j] = 0; 
                }
                
            }
            /* Update the previous row with 
            the values of the current row*/
            prev = cur; 
        }
        // Return the length of Longest Common Substring
        return ans;
    }
};

int main() {
    string s1 = "abcjklp";
    string s2 = "acjkp";

    // Create an instance of Solution class
    Solution sol;
    
    // Print the result
    cout << "The Length of Longest Common Substring is " << sol.longestCommonSubstr(s1, s2) << endl;

    return 0;
}

Complexity Analysis:

  • Time Complexity: O(N*M), Where N is the length of string S1 and M is the length of the string S2. As there are two nested loops, which is running for N*M.
  • Space Complexity: O(M), We are using an external array of size ‘M+1’ to store only two rows.