extract from attrubite
This commit is contained in:
parent
c47f058f3b
commit
129e95f556
@ -75,6 +75,14 @@ func (h *Handler) handleRender(c echo.Context) error {
|
|||||||
return echo.NewHTTPError(400, fmt.Errorf("decode specs: %w", err))
|
return echo.NewHTTPError(400, fmt.Errorf("decode specs: %w", err))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
extractFrom, ok := map[pb.ExtractFrom]models.ExtractFrom{
|
||||||
|
pb.ExtractFrom_InnerText: models.ExtractFrom_InnerText,
|
||||||
|
pb.ExtractFrom_Attribute: models.ExtractFrom_Attribute,
|
||||||
|
}[specs.CreatedExtractFrom]
|
||||||
|
if !ok {
|
||||||
|
return echo.NewHTTPError(400, "invalid extract from")
|
||||||
|
}
|
||||||
|
|
||||||
task := models.Task{
|
task := models.Task{
|
||||||
TaskType: models.TaskTypeExtract,
|
TaskType: models.TaskTypeExtract,
|
||||||
URL: specs.Url,
|
URL: specs.Url,
|
||||||
@ -84,6 +92,8 @@ func (h *Handler) handleRender(c echo.Context) error {
|
|||||||
SelectorDescription: specs.SelectorDescription,
|
SelectorDescription: specs.SelectorDescription,
|
||||||
SelectorAuthor: specs.SelectorAuthor,
|
SelectorAuthor: specs.SelectorAuthor,
|
||||||
SelectorCreated: specs.SelectorCreated,
|
SelectorCreated: specs.SelectorCreated,
|
||||||
|
CreatedExtractFrom: extractFrom,
|
||||||
|
CreatedAttributeName: specs.CreatedAttributeName,
|
||||||
SelectorContent: specs.SelectorContent,
|
SelectorContent: specs.SelectorContent,
|
||||||
SelectorEnclosure: specs.SelectorEnclosure,
|
SelectorEnclosure: specs.SelectorEnclosure,
|
||||||
Headers: extractHeaders(c),
|
Headers: extractHeaders(c),
|
||||||
|
|||||||
@ -114,7 +114,15 @@ func (p *pageParser) extractPost(post playwright.Locator) (models.FeedItem, erro
|
|||||||
|
|
||||||
item.Enclosure = newLocator(post, p.task.SelectorEnclosure).First().GetAttribute("src")
|
item.Enclosure = newLocator(post, p.task.SelectorEnclosure).First().GetAttribute("src")
|
||||||
|
|
||||||
createdDateStr := newLocator(post, p.task.SelectorCreated).First().InnerText()
|
var createdDateStr string
|
||||||
|
switch p.task.CreatedExtractFrom {
|
||||||
|
case models.ExtractFrom_InnerText:
|
||||||
|
createdDateStr = newLocator(post, p.task.SelectorCreated).First().InnerText()
|
||||||
|
case models.ExtractFrom_Attribute:
|
||||||
|
createdDateStr = newLocator(post, p.task.SelectorCreated).First().GetAttribute(p.task.CreatedAttributeName)
|
||||||
|
default:
|
||||||
|
return models.FeedItem{}, fmt.Errorf("invalid task.CreatedExtractFrom")
|
||||||
|
}
|
||||||
log.Debugf("date=%s", createdDateStr)
|
log.Debugf("date=%s", createdDateStr)
|
||||||
createdDate, err := p.dateParser.ParseDate(createdDateStr)
|
createdDate, err := p.dateParser.ParseDate(createdDateStr)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|||||||
@ -13,6 +13,13 @@ const (
|
|||||||
TaskTypePageScreenshot = "page_screenshot"
|
TaskTypePageScreenshot = "page_screenshot"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
type ExtractFrom int
|
||||||
|
|
||||||
|
const (
|
||||||
|
ExtractFrom_InnerText ExtractFrom = 0
|
||||||
|
ExtractFrom_Attribute ExtractFrom = 1
|
||||||
|
)
|
||||||
|
|
||||||
type Task struct {
|
type Task struct {
|
||||||
// While adding new fields, dont forget to alter caching func
|
// While adding new fields, dont forget to alter caching func
|
||||||
TaskType TaskType
|
TaskType TaskType
|
||||||
@ -23,6 +30,8 @@ type Task struct {
|
|||||||
SelectorDescription string
|
SelectorDescription string
|
||||||
SelectorAuthor string
|
SelectorAuthor string
|
||||||
SelectorCreated string
|
SelectorCreated string
|
||||||
|
CreatedExtractFrom ExtractFrom
|
||||||
|
CreatedAttributeName string
|
||||||
SelectorContent string
|
SelectorContent string
|
||||||
SelectorEnclosure string
|
SelectorEnclosure string
|
||||||
Headers map[string]string
|
Headers map[string]string
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user