Skip to content

Commit 72eeeeb

Browse files
committed
Fallback to a feed's updated date when items lack a published date
1 parent f4be7f7 commit 72eeeeb

2 files changed

Lines changed: 73 additions & 10 deletions

File tree

backend/feed/feed.go

Lines changed: 19 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,23 @@ func NormalizePostURL(blogURL, postURL string) string {
4949
return url
5050
}
5151

52+
func DeterminePublishedAt(feed *gofeed.Feed, item *gofeed.Item, now time.Time) time.Time {
53+
// Default the published date to now (though it will likely get overwritten).
54+
publishedAt := now
55+
56+
// If the feed has an updated date, use it (better than now).
57+
if feed.UpdatedParsed != nil {
58+
publishedAt = *feed.UpdatedParsed
59+
}
60+
61+
// If the item has a published date, use it (better than the feed's updated date).
62+
if item.PublishedParsed != nil {
63+
publishedAt = *item.PublishedParsed
64+
}
65+
66+
return publishedAt.UTC().Round(time.Microsecond)
67+
}
68+
5269
func Parse(feedURL string, feedBody string) (Blog, error) {
5370
fp := gofeed.NewParser()
5471
feed, err := fp.ParseString(feedBody)
@@ -58,21 +75,13 @@ func Parse(feedURL string, feedBody string) (Blog, error) {
5875

5976
var posts []Post
6077
for _, item := range feed.Items {
61-
// skip items without a link or title
78+
// Skip items without a link or title.
6279
if item.Link == "" || item.Title == "" {
6380
continue
6481
}
6582

6683
url := NormalizePostURL(feed.Link, item.Link)
67-
68-
// check for a publish date, else default to now
69-
publishedAt := time.Now()
70-
if item.PublishedParsed != nil {
71-
publishedAt = *item.PublishedParsed
72-
}
73-
74-
// ensure publishedAt is in UTC
75-
publishedAt = publishedAt.UTC().Round(time.Microsecond)
84+
publishedAt := DeterminePublishedAt(feed, item, time.Now())
7685

7786
post := Post{
7887
URL: url,

backend/feed/feed_test.go

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import (
44
"testing"
55
"time"
66

7+
"github.com/mmcdole/gofeed"
78
"github.com/theandrew168/bloggulus/backend/feed"
89
feedMock "github.com/theandrew168/bloggulus/backend/feed/mock"
910
"github.com/theandrew168/bloggulus/backend/test"
@@ -30,6 +31,59 @@ func TestNormalizePostURL(t *testing.T) {
3031
}
3132
}
3233

34+
func TestDeterminePublishedAt(t *testing.T) {
35+
t.Parallel()
36+
37+
feedUpdatedParsed := time.Now().AddDate(0, 0, -1)
38+
itemPublishedParsed := time.Now().AddDate(0, 0, -2)
39+
now := time.Now()
40+
41+
tests := []struct {
42+
feedUpdatedParsed *time.Time
43+
itemPublishedParsed *time.Time
44+
now time.Time
45+
want time.Time
46+
}{
47+
{
48+
// Without any feed or item published date, use now.
49+
feedUpdatedParsed: nil,
50+
itemPublishedParsed: nil,
51+
now: now,
52+
want: now.UTC().Round(time.Microsecond),
53+
},
54+
{
55+
// If the feed has an updated date, use it.
56+
feedUpdatedParsed: &feedUpdatedParsed,
57+
itemPublishedParsed: nil,
58+
now: now,
59+
want: feedUpdatedParsed.UTC().Round(time.Microsecond),
60+
},
61+
{
62+
// If the item has a published date, use it.
63+
feedUpdatedParsed: nil,
64+
itemPublishedParsed: &itemPublishedParsed,
65+
now: now,
66+
want: itemPublishedParsed.UTC().Round(time.Microsecond),
67+
},
68+
{
69+
// If item has a published date, use it even if the feed has an updated date.
70+
feedUpdatedParsed: &feedUpdatedParsed,
71+
itemPublishedParsed: &itemPublishedParsed,
72+
now: now,
73+
want: itemPublishedParsed.UTC().Round(time.Microsecond),
74+
},
75+
}
76+
77+
for _, tt := range tests {
78+
got := feed.DeterminePublishedAt(
79+
&gofeed.Feed{UpdatedParsed: tt.feedUpdatedParsed},
80+
&gofeed.Item{PublishedParsed: tt.itemPublishedParsed},
81+
tt.now,
82+
)
83+
test.AssertEqual(t, got, tt.want)
84+
}
85+
}
86+
3387
func TestParse(t *testing.T) {
3488
t.Parallel()
3589

0 commit comments

Comments
 (0)