Skip to content

Commit ca47f2c

Browse files
authored
Improvement/stricter serp api enrichment filter (#2670)
1 parent 1d194bd commit ca47f2c

File tree

15 files changed

+211
-45
lines changed

15 files changed

+211
-45
lines changed

services/apps/premium/members_enrichment_worker/src/activities.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import {
22
findMemberEnrichmentCache,
3+
findMemberIdentityWithTheMostActivityInPlatform,
34
getEnrichmentData,
45
insertMemberEnrichmentCache,
56
isCacheObsolete,
@@ -50,4 +51,5 @@ export {
5051
touchMemberEnrichmentCacheUpdatedAt,
5152
updateMemberEnrichmentCache,
5253
isEnrichableBySource,
54+
findMemberIdentityWithTheMostActivityInPlatform,
5355
}

services/apps/premium/members_enrichment_worker/src/activities/enrichment.ts

Lines changed: 50 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,16 @@
1+
import { findMemberIdentityWithTheMostActivityInPlatform as findMemberIdentityWithTheMostActivityInPlatformQuestDb } from '@crowd/data-access-layer/src/activities'
12
import {
23
findMemberEnrichmentCacheDb,
34
insertMemberEnrichmentCacheDb,
45
touchMemberEnrichmentCacheUpdatedAtDb,
56
updateMemberEnrichmentCacheDb,
67
} from '@crowd/data-access-layer/src/old/apps/premium/members_enrichment_worker'
7-
import { IMemberEnrichmentCache, MemberEnrichmentSource } from '@crowd/types'
8+
import { RedisCache } from '@crowd/redis'
9+
import {
10+
IEnrichableMemberIdentityActivityAggregate,
11+
IMemberEnrichmentCache,
12+
MemberEnrichmentSource,
13+
} from '@crowd/types'
814

915
import { EnrichmentSourceServiceFactory } from '../factory'
1016
import { svc } from '../main'
@@ -27,7 +33,7 @@ export async function getEnrichmentData(
2733
input: IEnrichmentSourceInput,
2834
): Promise<IMemberEnrichmentData | null> {
2935
const service = EnrichmentSourceServiceFactory.getEnrichmentSourceService(source, svc.log)
30-
if (service.isEnrichableBySource(input)) {
36+
if (service.isEnrichableBySource(input) && (await hasRemainingCredits(source))) {
3137
return service.getData(input)
3238
}
3339
return null
@@ -52,6 +58,41 @@ export async function isCacheObsolete(
5258
)
5359
}
5460

61+
export async function setHasRemainingCredits(
62+
source: MemberEnrichmentSource,
63+
hasCredits: boolean,
64+
): Promise<void> {
65+
const redisCache = new RedisCache(`enrichment-${source}`, svc.redis, svc.log)
66+
if (hasCredits) {
67+
await redisCache.set('hasRemainingCredits', 'true', 60)
68+
} else {
69+
await redisCache.set('hasRemainingCredits', 'false', 60)
70+
}
71+
}
72+
73+
export async function getHasRemainingCredits(source: MemberEnrichmentSource): Promise<boolean> {
74+
const redisCache = new RedisCache(`enrichment-${source}`, svc.redis, svc.log)
75+
return (await redisCache.get('hasRemainingCredits')) === 'true'
76+
}
77+
78+
export async function hasRemainingCreditsExists(source: MemberEnrichmentSource): Promise<boolean> {
79+
const redisCache = new RedisCache(`enrichment-${source}`, svc.redis, svc.log)
80+
return await redisCache.exists('hasRemainingCredits')
81+
}
82+
83+
export async function hasRemainingCredits(source: MemberEnrichmentSource): Promise<boolean> {
84+
const service = EnrichmentSourceServiceFactory.getEnrichmentSourceService(source, svc.log)
85+
86+
if (await hasRemainingCreditsExists(source)) {
87+
return getHasRemainingCredits(source)
88+
}
89+
90+
const hasCredits = await service.hasRemainingCredits()
91+
92+
await setHasRemainingCredits(source, hasCredits)
93+
return hasCredits
94+
}
95+
5596
export async function findMemberEnrichmentCache(
5697
source: MemberEnrichmentSource,
5798
memberId: string,
@@ -81,3 +122,10 @@ export async function touchMemberEnrichmentCacheUpdatedAt(
81122
): Promise<void> {
82123
await touchMemberEnrichmentCacheUpdatedAtDb(svc.postgres.writer.connection(), memberId, source)
83124
}
125+
126+
export async function findMemberIdentityWithTheMostActivityInPlatform(
127+
memberId: string,
128+
platform: string,
129+
): Promise<IEnrichableMemberIdentityActivityAggregate> {
130+
return findMemberIdentityWithTheMostActivityInPlatformQuestDb(svc.questdbSQL, memberId, platform)
131+
}

services/apps/premium/members_enrichment_worker/src/activities/getMembers.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ import { svc } from '../main'
1111
export async function getEnrichableMembers(
1212
limit: number,
1313
sources: MemberEnrichmentSource[],
14-
afterId: string,
14+
afterCursor: { activityCount: number; memberId: string } | null,
1515
): Promise<IEnrichableMember[]> {
1616
let rows: IEnrichableMember[] = []
1717
const sourceInputs: IMemberEnrichmentSourceQueryInput[] = sources.map((s) => {
@@ -23,7 +23,7 @@ export async function getEnrichableMembers(
2323
}
2424
})
2525
const db = svc.postgres.reader
26-
rows = await fetchMembersForEnrichment(db, limit, sourceInputs, afterId)
26+
rows = await fetchMembersForEnrichment(db, limit, sourceInputs, afterCursor)
2727

2828
return rows
2929
}

services/apps/premium/members_enrichment_worker/src/schedules/getMembersToEnrich.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ export const scheduleMembersEnrichment = async () => {
2727
},
2828
args: [
2929
{
30-
afterId: null,
30+
afterCursor: null,
3131
},
3232
],
3333
},

services/apps/premium/members_enrichment_worker/src/sources/clearbit/service.ts

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,9 @@ import {
2727
export default class EnrichmentServiceClearbit extends LoggerBase implements IEnrichmentService {
2828
public source: MemberEnrichmentSource = MemberEnrichmentSource.CLEARBIT
2929
public platform = `enrichment-${this.source}`
30-
public enrichableBySql = `mi.type = 'email' and mi.verified`
30+
public enrichMembersWithActivityMoreThan = 10
31+
32+
public enrichableBySql = `"activitySummary".total_count > ${this.enrichMembersWithActivityMoreThan} AND mi.type = 'email' and mi.verified`
3133

3234
// bust cache after 120 days
3335
public cacheObsoleteAfterSeconds = 60 * 60 * 24 * 120
@@ -63,6 +65,10 @@ export default class EnrichmentServiceClearbit extends LoggerBase implements IEn
6365
return enriched
6466
}
6567

68+
async hasRemainingCredits(): Promise<boolean> {
69+
return true
70+
}
71+
6672
private async getDataUsingEmail(email: string): Promise<IMemberEnrichmentDataClearbit> {
6773
let response: IMemberEnrichmentClearbitAPIResponse
6874

services/apps/premium/members_enrichment_worker/src/sources/progai/service.ts

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,10 @@ export default class EnrichmentServiceProgAI extends LoggerBase implements IEnri
124124
return enrichableUsingGithubHandle || enrichableUsingEmail
125125
}
126126

127+
async hasRemainingCredits(): Promise<boolean> {
128+
return true
129+
}
130+
127131
async getData(input: IEnrichmentSourceInput): Promise<IMemberEnrichmentDataProgAI> {
128132
let enriched: IMemberEnrichmentDataProgAI = null
129133

services/apps/premium/members_enrichment_worker/src/sources/serp/service.ts

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,16 @@ import {
99
IMemberEnrichmentDataNormalized,
1010
} from '../../types'
1111

12-
import { IMemberEnrichmentDataSerp, IMemberEnrichmentSerpApiResponse } from './types'
12+
import {
13+
IMemberEnrichmentDataSerp,
14+
IMemberEnrichmentSerpApiResponse,
15+
ISerpApiAccountUsageData,
16+
} from './types'
1317

1418
export default class EnrichmentServiceSerpApi extends LoggerBase implements IEnrichmentService {
1519
public source: MemberEnrichmentSource = MemberEnrichmentSource.SERP
1620
public platform = `enrichment-${this.source}`
17-
public enrichMembersWithActivityMoreThan = 10
21+
public enrichMembersWithActivityMoreThan = 500
1822

1923
public enrichableBySql = `
2024
("activitySummary".total_count > ${this.enrichMembersWithActivityMoreThan}) AND
@@ -45,6 +49,25 @@ export default class EnrichmentServiceSerpApi extends LoggerBase implements IEnr
4549
)
4650
}
4751

52+
async hasRemainingCredits(): Promise<boolean> {
53+
try {
54+
const config = {
55+
method: 'get',
56+
url: `https://serpapi.com/account`,
57+
params: {
58+
api_key: process.env['CROWD_ENRICHMENT_SERP_API_KEY'],
59+
},
60+
}
61+
62+
const response: ISerpApiAccountUsageData = (await axios(config)).data
63+
64+
return response.total_searches_left > 0
65+
} catch (error) {
66+
this.log.error('Error while checking serpapi account usage', error)
67+
return false
68+
}
69+
}
70+
4871
async getData(input: IEnrichmentSourceInput): Promise<IMemberEnrichmentDataSerp | null> {
4972
let enriched: IMemberEnrichmentDataSerp = null
5073

services/apps/premium/members_enrichment_worker/src/sources/serp/types.ts

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,24 @@ export interface IMemberEnrichmentSerpApiResponse {
77
search_information: IMemberEnrichmentSerpApiResponseSearchInformation
88
}
99

10+
export interface ISerpApiAccountUsageData {
11+
account_id: string
12+
api_key: string
13+
account_email: string
14+
account_status: string
15+
plan_id: string
16+
plan_name: string
17+
plan_monthly_price: number
18+
searches_per_month: number
19+
plan_searches_left: number
20+
extra_credits: number
21+
total_searches_left: number
22+
this_month_usage: number
23+
this_hour_searches: number
24+
last_hour_searches: number
25+
account_rate_limit_per_hour: number
26+
}
27+
1028
export interface IMemberEnrichmentSerpApiResponseSearchInformation {
1129
query_displayed: string
1230
total_results: number

services/apps/premium/members_enrichment_worker/src/types.ts

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,10 @@ export interface IEnrichmentService {
3737
// can the source enrich using this input
3838
isEnrichableBySource(input: IEnrichmentSourceInput): boolean
3939

40+
// does the source have credits to enrich members, if returned false the source will be skipped
41+
// response will be saved to redis for 60 seconds and will be used for subsequent calls
42+
hasRemainingCredits(): Promise<boolean>
43+
4044
// SQL filter to get enrichable members for a source
4145
// members table is available as "members" alias
4246
// memberIdentities table is available as "mi" alias
@@ -66,7 +70,7 @@ export interface IMemberEnrichmentDataNormalizedOrganization {
6670
}
6771

6872
export interface IGetMembersForEnrichmentArgs {
69-
afterId?: string
73+
afterCursor: { activityCount: number; memberId: string } | null
7074
}
7175

7276
export interface IMemberEnrichmentSocialData {

services/apps/premium/members_enrichment_worker/src/workflows/enrichMember.ts

Lines changed: 25 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ const {
1919
updateMemberEnrichmentCache,
2020
isCacheObsolete,
2121
normalizeEnrichmentData,
22+
findMemberIdentityWithTheMostActivityInPlatform,
2223
} = proxyActivities<typeof activities>({
2324
startToCloseTimeout: '20 seconds',
2425
retry: {
@@ -42,12 +43,6 @@ export async function enrichMember(
4243
// cache is obsolete when it's not found or cache.updatedAt is older than cacheObsoleteAfterSeconds
4344
if (await isCacheObsolete(source, cache)) {
4445
const enrichmentInput: IEnrichmentSourceInput = {
45-
github: input.identities.find(
46-
(i) =>
47-
i.verified &&
48-
i.platform === PlatformType.GITHUB &&
49-
i.type === MemberIdentityType.USERNAME,
50-
),
5146
email: input.identities.find((i) => i.verified && i.type === MemberIdentityType.EMAIL),
5247
linkedin: input.identities.find(
5348
(i) =>
@@ -61,6 +56,30 @@ export async function enrichMember(
6156
activityCount: input.activityCount || 0,
6257
}
6358

59+
// there can be multiple verified identities in github, we select the one with the most activities
60+
const verifiedGithubIdentities = input.identities.filter(
61+
(i) =>
62+
i.verified &&
63+
i.platform === PlatformType.GITHUB &&
64+
i.type === MemberIdentityType.USERNAME,
65+
)
66+
67+
if (verifiedGithubIdentities.length > 1) {
68+
const ghIdentityWithTheMostActivities =
69+
await findMemberIdentityWithTheMostActivityInPlatform(input.id, PlatformType.GITHUB)
70+
if (ghIdentityWithTheMostActivities) {
71+
enrichmentInput.github = input.identities.find(
72+
(i) =>
73+
i.verified &&
74+
i.platform === PlatformType.GITHUB &&
75+
i.type === MemberIdentityType.USERNAME &&
76+
i.value === ghIdentityWithTheMostActivities.username,
77+
)
78+
}
79+
} else {
80+
enrichmentInput.github = verifiedGithubIdentities?.[0] || undefined
81+
}
82+
6483
const data = await getEnrichmentData(source, enrichmentInput)
6584

6685
if (!cache) {

0 commit comments

Comments
 (0)