forked from mmcdole/gofeed
-
Notifications
You must be signed in to change notification settings - Fork 0
/
detector.go
81 lines (72 loc) · 1.69 KB
/
detector.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
package gofeed
import (
"bytes"
"io"
"strings"
"github.com/hartza-capital/gofeed/internal/shared"
jsoniter "github.com/json-iterator/go"
xpp "github.com/mmcdole/goxpp"
)
// FeedType represents one of the possible feed
// types that we can detect.
type FeedType int
const (
// FeedTypeUnknown represents a feed that could not have its
// type determiend.
FeedTypeUnknown FeedType = iota
// FeedTypeAtom repesents an Atom feed
FeedTypeAtom
// FeedTypeRSS represents an RSS feed
FeedTypeRSS
// FeedTypeJSON represents a JSON feed
FeedTypeJSON
)
// DetectFeedType attempts to determine the type of feed
// by looking for specific xml elements unique to the
// various feed types.
func DetectFeedType(feed io.Reader) FeedType {
buffer := new(bytes.Buffer)
buffer.ReadFrom(feed)
var firstChar byte
loop:
for {
ch, err := buffer.ReadByte()
if err != nil {
return FeedTypeUnknown
}
// ignore leading whitespace & byte order marks
switch ch {
case ' ', '\r', '\n', '\t':
case 0xFE, 0xFF, 0x00, 0xEF, 0xBB, 0xBF: // utf 8-16-32 bom
default:
firstChar = ch
buffer.UnreadByte()
break loop
}
}
if firstChar == '<' {
// Check if it's an XML based feed
p := xpp.NewXMLPullParser(bytes.NewReader(buffer.Bytes()), false, shared.NewReaderLabel)
_, err := shared.FindRoot(p)
if err != nil {
return FeedTypeUnknown
}
name := strings.ToLower(p.Name)
switch name {
case "rdf":
return FeedTypeRSS
case "rss":
return FeedTypeRSS
case "feed":
return FeedTypeAtom
default:
return FeedTypeUnknown
}
} else if firstChar == '{' {
// Check if document is valid JSON
if jsoniter.Valid(buffer.Bytes()) {
return FeedTypeJSON
}
}
return FeedTypeUnknown
}