extract from attrubite

This commit is contained in:
Egor Aristov 2025-05-06 20:47:10 +03:00
parent f220f9d9d7
commit 0e32cc3f17
Signed by: egor3f
GPG Key ID: 40482A264AAEC85F
3 changed files with 50 additions and 23 deletions

View File

@ -75,6 +75,14 @@ func (h *Handler) handleRender(c echo.Context) error {
return echo.NewHTTPError(400, fmt.Errorf("decode specs: %w", err))
}
extractFrom, ok := map[pb.ExtractFrom]models.ExtractFrom{
pb.ExtractFrom_InnerText: models.ExtractFrom_InnerText,
pb.ExtractFrom_Attribute: models.ExtractFrom_Attribute,
}[specs.CreatedExtractFrom]
if !ok {
return echo.NewHTTPError(400, "invalid extract from")
}
task := models.Task{
TaskType: models.TaskTypeExtract,
URL: specs.Url,
@ -84,6 +92,8 @@ func (h *Handler) handleRender(c echo.Context) error {
SelectorDescription: specs.SelectorDescription,
SelectorAuthor: specs.SelectorAuthor,
SelectorCreated: specs.SelectorCreated,
CreatedExtractFrom: extractFrom,
CreatedAttributeName: specs.CreatedAttributeName,
SelectorContent: specs.SelectorContent,
SelectorEnclosure: specs.SelectorEnclosure,
Headers: extractHeaders(c),

View File

@ -114,7 +114,15 @@ func (p *pageParser) extractPost(post playwright.Locator) (models.FeedItem, erro
item.Enclosure = newLocator(post, p.task.SelectorEnclosure).First().GetAttribute("src")
createdDateStr := newLocator(post, p.task.SelectorCreated).First().InnerText()
var createdDateStr string
switch p.task.CreatedExtractFrom {
case models.ExtractFrom_InnerText:
createdDateStr = newLocator(post, p.task.SelectorCreated).First().InnerText()
case models.ExtractFrom_Attribute:
createdDateStr = newLocator(post, p.task.SelectorCreated).First().GetAttribute(p.task.CreatedAttributeName)
default:
return models.FeedItem{}, fmt.Errorf("invalid task.CreatedExtractFrom")
}
log.Debugf("date=%s", createdDateStr)
createdDate, err := p.dateParser.ParseDate(createdDateStr)
if err != nil {

View File

@ -13,6 +13,13 @@ const (
TaskTypePageScreenshot = "page_screenshot"
)
type ExtractFrom int
const (
ExtractFrom_InnerText ExtractFrom = 0
ExtractFrom_Attribute ExtractFrom = 1
)
type Task struct {
// While adding new fields, dont forget to alter caching func
TaskType TaskType
@ -23,6 +30,8 @@ type Task struct {
SelectorDescription string
SelectorAuthor string
SelectorCreated string
CreatedExtractFrom ExtractFrom
CreatedAttributeName string
SelectorContent string
SelectorEnclosure string
Headers map[string]string