extract from attrubite

This commit is contained in:
Egor Aristov 2025-05-06 20:47:10 +03:00
parent f220f9d9d7
commit 0e32cc3f17
Signed by: egor3f
GPG Key ID: 40482A264AAEC85F
3 changed files with 50 additions and 23 deletions

View File

@ -75,6 +75,14 @@ func (h *Handler) handleRender(c echo.Context) error {
return echo.NewHTTPError(400, fmt.Errorf("decode specs: %w", err)) return echo.NewHTTPError(400, fmt.Errorf("decode specs: %w", err))
} }
extractFrom, ok := map[pb.ExtractFrom]models.ExtractFrom{
pb.ExtractFrom_InnerText: models.ExtractFrom_InnerText,
pb.ExtractFrom_Attribute: models.ExtractFrom_Attribute,
}[specs.CreatedExtractFrom]
if !ok {
return echo.NewHTTPError(400, "invalid extract from")
}
task := models.Task{ task := models.Task{
TaskType: models.TaskTypeExtract, TaskType: models.TaskTypeExtract,
URL: specs.Url, URL: specs.Url,
@ -84,6 +92,8 @@ func (h *Handler) handleRender(c echo.Context) error {
SelectorDescription: specs.SelectorDescription, SelectorDescription: specs.SelectorDescription,
SelectorAuthor: specs.SelectorAuthor, SelectorAuthor: specs.SelectorAuthor,
SelectorCreated: specs.SelectorCreated, SelectorCreated: specs.SelectorCreated,
CreatedExtractFrom: extractFrom,
CreatedAttributeName: specs.CreatedAttributeName,
SelectorContent: specs.SelectorContent, SelectorContent: specs.SelectorContent,
SelectorEnclosure: specs.SelectorEnclosure, SelectorEnclosure: specs.SelectorEnclosure,
Headers: extractHeaders(c), Headers: extractHeaders(c),

View File

@ -114,7 +114,15 @@ func (p *pageParser) extractPost(post playwright.Locator) (models.FeedItem, erro
item.Enclosure = newLocator(post, p.task.SelectorEnclosure).First().GetAttribute("src") item.Enclosure = newLocator(post, p.task.SelectorEnclosure).First().GetAttribute("src")
createdDateStr := newLocator(post, p.task.SelectorCreated).First().InnerText() var createdDateStr string
switch p.task.CreatedExtractFrom {
case models.ExtractFrom_InnerText:
createdDateStr = newLocator(post, p.task.SelectorCreated).First().InnerText()
case models.ExtractFrom_Attribute:
createdDateStr = newLocator(post, p.task.SelectorCreated).First().GetAttribute(p.task.CreatedAttributeName)
default:
return models.FeedItem{}, fmt.Errorf("invalid task.CreatedExtractFrom")
}
log.Debugf("date=%s", createdDateStr) log.Debugf("date=%s", createdDateStr)
createdDate, err := p.dateParser.ParseDate(createdDateStr) createdDate, err := p.dateParser.ParseDate(createdDateStr)
if err != nil { if err != nil {

View File

@ -13,6 +13,13 @@ const (
TaskTypePageScreenshot = "page_screenshot" TaskTypePageScreenshot = "page_screenshot"
) )
type ExtractFrom int
const (
ExtractFrom_InnerText ExtractFrom = 0
ExtractFrom_Attribute ExtractFrom = 1
)
type Task struct { type Task struct {
// While adding new fields, dont forget to alter caching func // While adding new fields, dont forget to alter caching func
TaskType TaskType TaskType TaskType
@ -23,6 +30,8 @@ type Task struct {
SelectorDescription string SelectorDescription string
SelectorAuthor string SelectorAuthor string
SelectorCreated string SelectorCreated string
CreatedExtractFrom ExtractFrom
CreatedAttributeName string
SelectorContent string SelectorContent string
SelectorEnclosure string SelectorEnclosure string
Headers map[string]string Headers map[string]string