TLE in 1st test case. Passing 2nd and 3rd test case

#include <bits/stdc++.h>
using namespace std;

/* macros */
#define inf 1e9
#define int unsigned long long
#define fastio ios_base::sync_with_stdio(false);cin.tie(0);
#define vi vector
#define vc vector
#define vb vector
#define vt vector
#define pb push_back
#define fi first
#define se second
#define pi pair<int,int>
#define tc int t;cin>>t;while (t–){solve();}
#define gtc int t;cin>>t;int TC=1;while (t–) {cout << “Case #” << TC << ": " << solve() << “\n”;TC++;}
#define gvtc int t;cin>>t;int TC=1;while (t–) {cout << “Case #” << TC << ": "; vsolve(); TC++;}
#define notc solve();
#define newline cout << “\n”;
#define mod 1000000007
#define ret return 0;
#define sz(x) (int)x.size();
#define rep(i,a,b) for(int i=a;i<b;i++)
#define srt(x) sort(x.begin(), x.end())
#define rsrt(x) sort(x.rbegin(), x.rend())

string a,b;
string s;

int dp[51][2][20][20][20];

int go(int pos, bool tight, int cnt3, int cnt6, int cnt9) {

if (cnt3 >= 18 or cnt6 >= 18 or cnt9 >= 18) {
    return 0;
}

if (pos == s.size()) {
    if (cnt3 > 0 and cnt3 == cnt6 and cnt6 == cnt9) {
        return 1;
    }
    return 0;
}

if (dp[pos][tight][cnt3][cnt6][cnt9] != -1) {
    return dp[pos][tight][cnt3][cnt6][cnt9];
}

int ans = 0;
int en = tight ? (s[pos]-'0') : 9;

for (int i=0;i<=en;i++) {
    if (i == 3) {
        ans = (ans%mod + go(pos+1, tight&(i==en), cnt3+1, cnt6, cnt9)%mod)%mod;
    }
    else if (i == 6) {
        ans = (ans%mod + go(pos+1, tight&(i==en), cnt3, cnt6+1, cnt9)%mod)%mod;
    }
    else if (i == 9) {
        ans = (ans%mod + go(pos+1, tight&(i==en), cnt3, cnt6, cnt9+1)%mod)%mod;
    }else {
        ans = (ans%mod + go(pos+1, tight&(i==en), cnt3, cnt6, cnt9)%mod)%mod;
    }
    
}

return dp[pos][tight][cnt3][cnt6][cnt9] = ans;

}

void solve() {
cin>>a>>b;
reverse(a.begin(),a.end());
while (a.size() < b.size()) {
a += ‘0’;
}
reverse(a.begin(), a.end());

s = b;
memset (dp, -1, sizeof dp);
int ans = go(0,1,0,0,0);
s = a;
memset (dp, -1, sizeof dp);
ans = (ans - go(0, 1, 0, 0, 0) + mod)%mod;

vi freq(3,0);
for (char c : a) {
    if (c == '3') freq[0]++;
    if (c == '6') freq[1]++;
    if (c == '9') freq[2]++;
}
if (freq[0] > 0 && freq[0] == freq[1] && freq[1] == freq[2]) {
    ans += 1;
}
cout << ans << "\n";

}

signed main(){
fastio
tc
ret
}

My above code is working for 2nd and 3rd test cases, but it is giving tle for 1st test case. Can somebody tell me how do I optimize it further or is there any mistake I am making?