From b1d28b62e5d3766f6db37af274198f8d00a92efe Mon Sep 17 00:00:00 2001 From: Egor Aristov Date: Sun, 2 Feb 2025 15:33:49 +0300 Subject: [PATCH] fix: skip posts with empty date --- internal/dateparser/dateparser.go | 5 +++++ internal/extractors/pwextractor/pwextractor.go | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/internal/dateparser/dateparser.go b/internal/dateparser/dateparser.go index 36ac2ca..e958de3 100644 --- a/internal/dateparser/dateparser.go +++ b/internal/dateparser/dateparser.go @@ -1,6 +1,7 @@ package dateparser import ( + "fmt" godateparser "github.com/markusmobius/go-dateparser" "strings" "time" @@ -13,6 +14,10 @@ type DateParser struct { func (d *DateParser) ParseDate(str string) (time.Time, error) { str = strings.TrimSpace(str) + if len(str) == 0 { + return time.Time{}, fmt.Errorf("date string is empty") + } + dt, err := godateparser.Parse(&godateparser.Configuration{ CurrentTime: d.CurrentTimeFunc(), }, str) diff --git a/internal/extractors/pwextractor/pwextractor.go b/internal/extractors/pwextractor/pwextractor.go index 63c465b..9856282 100644 --- a/internal/extractors/pwextractor/pwextractor.go +++ b/internal/extractors/pwextractor/pwextractor.go @@ -287,7 +287,7 @@ func (p *pageParser) parse() (*models.TaskResult, error) { log.Errorf("extract post fields: %v", err) continue } - if len(item.Title) == 0 || len(item.Link) == 0 { + if len(item.Title) == 0 || len(item.Link) == 0 || item.Created.IsZero() { log.Warnf("post has no required fields, skip") continue }