extract from attrubite

This commit is contained in:
Egor Aristov 2025-05-06 20:47:10 +03:00
parent f220f9d9d7
commit 0e32cc3f17
Signed by: egor3f
GPG Key ID: 40482A264AAEC85F
3 changed files with 50 additions and 23 deletions

View File

@ -75,18 +75,28 @@ func (h *Handler) handleRender(c echo.Context) error {
return echo.NewHTTPError(400, fmt.Errorf("decode specs: %w", err))
}
extractFrom, ok := map[pb.ExtractFrom]models.ExtractFrom{
pb.ExtractFrom_InnerText: models.ExtractFrom_InnerText,
pb.ExtractFrom_Attribute: models.ExtractFrom_Attribute,
}[specs.CreatedExtractFrom]
if !ok {
return echo.NewHTTPError(400, "invalid extract from")
}
task := models.Task{
TaskType: models.TaskTypeExtract,
URL: specs.Url,
SelectorPost: specs.SelectorPost,
SelectorTitle: specs.SelectorTitle,
SelectorLink: specs.SelectorLink,
SelectorDescription: specs.SelectorDescription,
SelectorAuthor: specs.SelectorAuthor,
SelectorCreated: specs.SelectorCreated,
SelectorContent: specs.SelectorContent,
SelectorEnclosure: specs.SelectorEnclosure,
Headers: extractHeaders(c),
TaskType: models.TaskTypeExtract,
URL: specs.Url,
SelectorPost: specs.SelectorPost,
SelectorTitle: specs.SelectorTitle,
SelectorLink: specs.SelectorLink,
SelectorDescription: specs.SelectorDescription,
SelectorAuthor: specs.SelectorAuthor,
SelectorCreated: specs.SelectorCreated,
CreatedExtractFrom: extractFrom,
CreatedAttributeName: specs.CreatedAttributeName,
SelectorContent: specs.SelectorContent,
SelectorEnclosure: specs.SelectorEnclosure,
Headers: extractHeaders(c),
}
cacheLifetime, err := time.ParseDuration(specs.CacheLifetime)

View File

@ -114,7 +114,15 @@ func (p *pageParser) extractPost(post playwright.Locator) (models.FeedItem, erro
item.Enclosure = newLocator(post, p.task.SelectorEnclosure).First().GetAttribute("src")
createdDateStr := newLocator(post, p.task.SelectorCreated).First().InnerText()
var createdDateStr string
switch p.task.CreatedExtractFrom {
case models.ExtractFrom_InnerText:
createdDateStr = newLocator(post, p.task.SelectorCreated).First().InnerText()
case models.ExtractFrom_Attribute:
createdDateStr = newLocator(post, p.task.SelectorCreated).First().GetAttribute(p.task.CreatedAttributeName)
default:
return models.FeedItem{}, fmt.Errorf("invalid task.CreatedExtractFrom")
}
log.Debugf("date=%s", createdDateStr)
createdDate, err := p.dateParser.ParseDate(createdDateStr)
if err != nil {

View File

@ -13,19 +13,28 @@ const (
TaskTypePageScreenshot = "page_screenshot"
)
type ExtractFrom int
const (
ExtractFrom_InnerText ExtractFrom = 0
ExtractFrom_Attribute ExtractFrom = 1
)
type Task struct {
// While adding new fields, dont forget to alter caching func
TaskType TaskType
URL string
SelectorPost string
SelectorTitle string
SelectorLink string
SelectorDescription string
SelectorAuthor string
SelectorCreated string
SelectorContent string
SelectorEnclosure string
Headers map[string]string
TaskType TaskType
URL string
SelectorPost string
SelectorTitle string
SelectorLink string
SelectorDescription string
SelectorAuthor string
SelectorCreated string
CreatedExtractFrom ExtractFrom
CreatedAttributeName string
SelectorContent string
SelectorEnclosure string
Headers map[string]string
}
func (t Task) CacheKey() string {