Skip to content

Commit

Permalink
Parse media links
Browse files Browse the repository at this point in the history
  • Loading branch information
GreenTeaCake committed Apr 16, 2024
1 parent 244ae13 commit eb96814
Show file tree
Hide file tree
Showing 3 changed files with 64 additions and 27 deletions.
40 changes: 24 additions & 16 deletions core/loader/atom.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,27 +2,35 @@ import type { TextResponse } from '../download.js'
import type { OriginPost } from '../post.js'
import { createPostsPage } from '../posts-page.js'
import type { Loader } from './index.js'
import { findAnchorHrefs, findLinksByType, toTime } from './utils.js'
import {
findAnchorHrefs,
findImageByAttr,
findLinksByType,
toTime
} from './utils.js'

function parsePosts(text: TextResponse): OriginPost[] {
let document = text.parse()
return [...document.querySelectorAll('entry')]
.filter(entry => entry.querySelector('id')?.textContent)
.map(entry => ({
full: entry.querySelector('content')?.textContent ?? undefined,
intro: entry.querySelector('summary')?.textContent ?? undefined,
media: [],
originId: entry.querySelector('id')!.textContent!,
publishedAt: toTime(
entry.querySelector('published')?.textContent ??
entry.querySelector('updated')?.textContent
),
title: entry.querySelector('title')?.textContent ?? undefined,
url:
entry
.querySelector('link[rel=alternate], link:not([rel])')
?.getAttribute('href') ?? undefined
}))
.map(entry => {
const content = entry.querySelector('content')
return {
full: content?.textContent ?? undefined,
intro: entry.querySelector('summary')?.textContent ?? undefined,
media: findImageByAttr('src', content?.querySelectorAll('img')),
originId: entry.querySelector('id')!.textContent!,
publishedAt: toTime(
entry.querySelector('published')?.textContent ??
entry.querySelector('updated')?.textContent
),
title: entry.querySelector('title')?.textContent ?? undefined,
url:
entry
.querySelector('link[rel=alternate], link:not([rel])')
?.getAttribute('href') ?? undefined
}
})
}

export const atom: Loader = {
Expand Down
41 changes: 30 additions & 11 deletions core/loader/rss.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,14 @@ import type { TextResponse } from '../download.js'
import type { OriginPost } from '../post.js'
import { createPostsPage } from '../posts-page.js'
import type { Loader } from './index.js'
import { findAnchorHrefs, findLinksByType, toTime } from './utils.js'
import {
findAnchorHrefs,
findImageByAttr,
findLinksByType,
toTime
} from './utils.js'

const MEDIA_NS_URI = 'http://search.yahoo.com/mrss/'

function parsePosts(text: TextResponse): OriginPost[] {
let document = text.parse()
Expand All @@ -12,16 +19,28 @@ function parsePosts(text: TextResponse): OriginPost[] {
item.querySelector('guid')?.textContent ??
item.querySelector('link')?.textContent
)
.map(item => ({
full: item.querySelector('description')?.textContent ?? undefined,
media: [],
originId:
item.querySelector('guid')?.textContent ??
item.querySelector('link')!.textContent!,
publishedAt: toTime(item.querySelector('pubDate')?.textContent),
title: item.querySelector('title')?.textContent ?? undefined,
url: item.querySelector('link')?.textContent ?? undefined
}))
.map(item => {
const description = item.querySelector('description')

const descriptionImageElements = description?.querySelectorAll('img')
const descriptionImages = findImageByAttr('src', descriptionImageElements)

const mediaImageElements = [
...item.getElementsByTagNameNS(MEDIA_NS_URI, 'content')
].filter(element => element.getAttribute('medium') === 'image')
const mediaImages = findImageByAttr('url', mediaImageElements)

return {
full: description?.textContent ?? undefined,
media: [...new Set([...descriptionImages, ...mediaImages])],
originId:
item.querySelector('guid')?.textContent ??
item.querySelector('link')!.textContent!,
publishedAt: toTime(item.querySelector('pubDate')?.textContent),
title: item.querySelector('title')?.textContent ?? undefined,
url: item.querySelector('link')?.textContent ?? undefined
}
})
}

export const rss: Loader = {
Expand Down
10 changes: 10 additions & 0 deletions core/loader/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -58,3 +58,13 @@ export function toTime(date: null | string | undefined): number | undefined {
return time
}
}

export function findImageByAttr(
attr: 'src' | 'url',
elements?: Iterable<Element> | null
): string[] {
return [...(elements || [])].reduce<string[]>((urls, element) => {
const url = element.getAttribute(attr)
return url ? urls.concat(url) : urls
}, [])
}

0 comments on commit eb96814

Please sign in to comment.