extract from attrubite

This commit is contained in:
Egor Aristov 2025-05-06 20:47:10 +03:00
parent f220f9d9d7
commit 0e32cc3f17
Signed by: egor3f
GPG Key ID: 40482A264AAEC85F
3 changed files with 50 additions and 23 deletions

View File

@ -75,18 +75,28 @@ func (h *Handler) handleRender(c echo.Context) error {
return echo.NewHTTPError(400, fmt.Errorf("decode specs: %w", err)) return echo.NewHTTPError(400, fmt.Errorf("decode specs: %w", err))
} }
extractFrom, ok := map[pb.ExtractFrom]models.ExtractFrom{
pb.ExtractFrom_InnerText: models.ExtractFrom_InnerText,
pb.ExtractFrom_Attribute: models.ExtractFrom_Attribute,
}[specs.CreatedExtractFrom]
if !ok {
return echo.NewHTTPError(400, "invalid extract from")
}
task := models.Task{ task := models.Task{
TaskType: models.TaskTypeExtract, TaskType: models.TaskTypeExtract,
URL: specs.Url, URL: specs.Url,
SelectorPost: specs.SelectorPost, SelectorPost: specs.SelectorPost,
SelectorTitle: specs.SelectorTitle, SelectorTitle: specs.SelectorTitle,
SelectorLink: specs.SelectorLink, SelectorLink: specs.SelectorLink,
SelectorDescription: specs.SelectorDescription, SelectorDescription: specs.SelectorDescription,
SelectorAuthor: specs.SelectorAuthor, SelectorAuthor: specs.SelectorAuthor,
SelectorCreated: specs.SelectorCreated, SelectorCreated: specs.SelectorCreated,
SelectorContent: specs.SelectorContent, CreatedExtractFrom: extractFrom,
SelectorEnclosure: specs.SelectorEnclosure, CreatedAttributeName: specs.CreatedAttributeName,
Headers: extractHeaders(c), SelectorContent: specs.SelectorContent,
SelectorEnclosure: specs.SelectorEnclosure,
Headers: extractHeaders(c),
} }
cacheLifetime, err := time.ParseDuration(specs.CacheLifetime) cacheLifetime, err := time.ParseDuration(specs.CacheLifetime)

View File

@ -114,7 +114,15 @@ func (p *pageParser) extractPost(post playwright.Locator) (models.FeedItem, erro
item.Enclosure = newLocator(post, p.task.SelectorEnclosure).First().GetAttribute("src") item.Enclosure = newLocator(post, p.task.SelectorEnclosure).First().GetAttribute("src")
createdDateStr := newLocator(post, p.task.SelectorCreated).First().InnerText() var createdDateStr string
switch p.task.CreatedExtractFrom {
case models.ExtractFrom_InnerText:
createdDateStr = newLocator(post, p.task.SelectorCreated).First().InnerText()
case models.ExtractFrom_Attribute:
createdDateStr = newLocator(post, p.task.SelectorCreated).First().GetAttribute(p.task.CreatedAttributeName)
default:
return models.FeedItem{}, fmt.Errorf("invalid task.CreatedExtractFrom")
}
log.Debugf("date=%s", createdDateStr) log.Debugf("date=%s", createdDateStr)
createdDate, err := p.dateParser.ParseDate(createdDateStr) createdDate, err := p.dateParser.ParseDate(createdDateStr)
if err != nil { if err != nil {

View File

@ -13,19 +13,28 @@ const (
TaskTypePageScreenshot = "page_screenshot" TaskTypePageScreenshot = "page_screenshot"
) )
type ExtractFrom int
const (
ExtractFrom_InnerText ExtractFrom = 0
ExtractFrom_Attribute ExtractFrom = 1
)
type Task struct { type Task struct {
// While adding new fields, dont forget to alter caching func // While adding new fields, dont forget to alter caching func
TaskType TaskType TaskType TaskType
URL string URL string
SelectorPost string SelectorPost string
SelectorTitle string SelectorTitle string
SelectorLink string SelectorLink string
SelectorDescription string SelectorDescription string
SelectorAuthor string SelectorAuthor string
SelectorCreated string SelectorCreated string
SelectorContent string CreatedExtractFrom ExtractFrom
SelectorEnclosure string CreatedAttributeName string
Headers map[string]string SelectorContent string
SelectorEnclosure string
Headers map[string]string
} }
func (t Task) CacheKey() string { func (t Task) CacheKey() string {