date parsing enhancements
This commit is contained in:
parent
7069d8803f
commit
0d48fe8554
@ -7,7 +7,6 @@ import (
|
||||
"github.com/egor3f/rssalchemy/internal/config"
|
||||
"github.com/egor3f/rssalchemy/internal/models"
|
||||
"github.com/labstack/gommon/log"
|
||||
"github.com/markusmobius/go-dateparser"
|
||||
"github.com/playwright-community/playwright-go"
|
||||
"maps"
|
||||
"strings"
|
||||
@ -304,7 +303,7 @@ func (p *pageParser) extractPost(post playwright.Locator) (models.FeedItem, erro
|
||||
|
||||
createdDateStr := p.must(post.Locator(p.task.SelectorCreated).First().InnerText(defOptInText))
|
||||
log.Debugf("date=%s", createdDateStr)
|
||||
createdDate, err := dateparser.Parse(nil, createdDateStr)
|
||||
createdDate, err := parseDate(createdDateStr)
|
||||
if err != nil {
|
||||
log.Errorf("dateparser: %v", err)
|
||||
} else {
|
||||
|
||||
@ -2,6 +2,8 @@ package pwextractor
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"github.com/markusmobius/go-dateparser"
|
||||
"github.com/markusmobius/go-dateparser/date"
|
||||
"github.com/playwright-community/playwright-go"
|
||||
"net/url"
|
||||
"slices"
|
||||
@ -81,3 +83,24 @@ func parseCookieString(cookieStr string) ([][2]string, error) {
|
||||
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func parseDate(str string) (d date.Date, err error) {
|
||||
str = strings.TrimSpace(str)
|
||||
|
||||
d, err = dateparser.Parse(nil, str)
|
||||
if err == nil {
|
||||
return
|
||||
}
|
||||
|
||||
parts := strings.Split(str, " ")
|
||||
for len(parts) > 1 {
|
||||
newStr := strings.Join(parts, " ")
|
||||
d, err = dateparser.Parse(nil, newStr)
|
||||
if err == nil {
|
||||
return
|
||||
}
|
||||
parts = parts[1:]
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user