date parsing enhancements

This commit is contained in:
Egor Aristov 2025-01-26 16:14:45 +03:00
parent f40f912892
commit d54e464816
2 changed files with 24 additions and 2 deletions

View File

@ -7,7 +7,6 @@ import (
"github.com/egor3f/rssalchemy/internal/config" "github.com/egor3f/rssalchemy/internal/config"
"github.com/egor3f/rssalchemy/internal/models" "github.com/egor3f/rssalchemy/internal/models"
"github.com/labstack/gommon/log" "github.com/labstack/gommon/log"
"github.com/markusmobius/go-dateparser"
"github.com/playwright-community/playwright-go" "github.com/playwright-community/playwright-go"
"maps" "maps"
"strings" "strings"
@ -304,7 +303,7 @@ func (p *pageParser) extractPost(post playwright.Locator) (models.FeedItem, erro
createdDateStr := p.must(post.Locator(p.task.SelectorCreated).First().InnerText(defOptInText)) createdDateStr := p.must(post.Locator(p.task.SelectorCreated).First().InnerText(defOptInText))
log.Debugf("date=%s", createdDateStr) log.Debugf("date=%s", createdDateStr)
createdDate, err := dateparser.Parse(nil, createdDateStr) createdDate, err := parseDate(createdDateStr)
if err != nil { if err != nil {
log.Errorf("dateparser: %v", err) log.Errorf("dateparser: %v", err)
} else { } else {

View File

@ -2,6 +2,8 @@ package pwextractor
import ( import (
"fmt" "fmt"
"github.com/markusmobius/go-dateparser"
"github.com/markusmobius/go-dateparser/date"
"github.com/playwright-community/playwright-go" "github.com/playwright-community/playwright-go"
"net/url" "net/url"
"slices" "slices"
@ -81,3 +83,24 @@ func parseCookieString(cookieStr string) ([][2]string, error) {
return result, nil return result, nil
} }
func parseDate(str string) (d date.Date, err error) {
str = strings.TrimSpace(str)
d, err = dateparser.Parse(nil, str)
if err == nil {
return
}
parts := strings.Split(str, " ")
for len(parts) > 1 {
newStr := strings.Join(parts, " ")
d, err = dateparser.Parse(nil, newStr)
if err == nil {
return
}
parts = parts[1:]
}
return
}