diff --git a/frontend/wizard/main.css b/frontend/wizard/main.css
index 7b19695..0a35b0f 100644
--- a/frontend/wizard/main.css
+++ b/frontend/wizard/main.css
@@ -1,4 +1,4 @@
-#ready_url_link {
+#ready_url_link, #page_screenshot_link {
visibility: hidden;
}
diff --git a/frontend/wizard/main.js b/frontend/wizard/main.js
index f4a0910..81e7787 100644
--- a/frontend/wizard/main.js
+++ b/frontend/wizard/main.js
@@ -47,24 +47,35 @@ function displayUrl(url) {
}
function baseUrl() {
- return document.location.origin + '/api/v1/render/';
+ return document.location.origin + '/api/v1';
}
async function genUrl() {
let specs = readSpecsForm();
let encodedSpecs = await encodeSpecs(specs);
- let url = baseUrl() + encodedSpecs;
+ let url = baseUrl() + '/render/' + encodedSpecs;
displayUrl(url);
}
async function editUrl() {
let url = document.getElementById('url_input').value;
- let specs = await decodeSpecs(url.replace(baseUrl(), ''));
+ let specs = await decodeSpecs(url.replace(baseUrl() + '/render/', ''));
writeSpecsToForm(specs);
displayUrl(url);
}
+function onUrlInput() {
+ let url = document.forms['wizard'].elements['url'].value;
+ if (url.trim().length > 0) {
+ document.getElementById('page_screenshot_link').style.visibility = 'visible';
+ document.getElementById('page_screenshot_link').href = `${baseUrl()}/screenshot?url=${url}`;
+ } else {
+ document.getElementById('page_screenshot_link').style.visibility = 'hidden';
+ }
+}
+
document.addEventListener('DOMContentLoaded', ev => {
document.getElementById('btn_gen_url').addEventListener('click', genUrl);
document.getElementById('btn_edit').addEventListener('click', editUrl);
+ document.getElementById('w_url').addEventListener('input', onUrlInput);
});
diff --git a/internal/delivery/http/handler.go b/internal/delivery/http/handler.go
index d16b33c..9a33820 100644
--- a/internal/delivery/http/handler.go
+++ b/internal/delivery/http/handler.go
@@ -16,9 +16,16 @@ import (
"github.com/labstack/gommon/log"
"html"
"io"
+ "net/url"
"time"
)
+const (
+ taskTimeout = 20 * time.Second
+ minLifetime = taskTimeout
+ maxLifetime = 24 * time.Hour
+)
+
type Handler struct {
validate *validator.Validate
CachedQueue adapters.CachedWorkQueue
@@ -35,6 +42,7 @@ func New(cq adapters.CachedWorkQueue) *Handler {
func (h *Handler) SetupRoutes(g *echo.Group) {
g.GET("/render/:specs", h.handleRender)
+ g.GET("/screenshot", h.handlePageScreenshot)
}
type Specs struct {
@@ -58,6 +66,7 @@ func (h *Handler) handleRender(c echo.Context) error {
}
task := models.Task{
+ TaskType: models.TaskTypeExtract,
URL: specs.URL,
SelectorPost: specs.SelectorPost,
SelectorTitle: specs.SelectorTitle,
@@ -69,9 +78,6 @@ func (h *Handler) handleRender(c echo.Context) error {
SelectorEnclosure: specs.SelectorEnclosure,
}
- taskTimeout := 20 * time.Second
- minLifetime := taskTimeout
- maxLifetime := 24 * time.Hour
cacheLifetime, err := time.ParseDuration(specs.CacheLifetime)
if err != nil {
return echo.NewHTTPError(400, "invalid cache lifetime")
@@ -98,7 +104,6 @@ func (h *Handler) handleRender(c echo.Context) error {
var result models.TaskResult
if err := json.Unmarshal(taskResultBytes, &result); err != nil {
- log.Errorf("cached value unmarshal failed: %v", err)
return echo.NewHTTPError(500, fmt.Errorf("cached value unmarshal failed: %v", err))
}
@@ -112,6 +117,38 @@ func (h *Handler) handleRender(c echo.Context) error {
return c.String(200, atom)
}
+func (h *Handler) handlePageScreenshot(c echo.Context) error {
+ pageUrl := c.QueryParam("url")
+ if _, err := url.Parse(pageUrl); err != nil {
+ return echo.NewHTTPError(400, "url is invalid or missing")
+ }
+
+ task := models.Task{
+ TaskType: models.TaskTypePageScreenshot,
+ URL: pageUrl,
+ }
+
+ timeoutCtx, cancel := context.WithTimeout(context.Background(), taskTimeout)
+ defer cancel()
+
+ encodedTask, err := json.Marshal(task)
+ if err != nil {
+ return echo.NewHTTPError(500, fmt.Errorf("task marshal error: %v", err))
+ }
+
+ cacheLifetime := minLifetime
+ taskResultBytes, err := h.CachedQueue.ProcessWorkCached(timeoutCtx, cacheLifetime, task.CacheKey(), encodedTask)
+ if err != nil {
+ return echo.NewHTTPError(500, fmt.Errorf("queued cache failed: %v", err))
+ }
+
+ var result models.ScreenshotTaskResult
+ if err := json.Unmarshal(taskResultBytes, &result); err != nil {
+ return echo.NewHTTPError(500, fmt.Errorf("task result unmarshal failed: %v", err))
+ }
+ return c.Blob(200, "image/png", result.Image)
+}
+
func (h *Handler) decodeSpecs(specsParam string) (Specs, error) {
decodedSpecsParam, err := base64.StdEncoding.WithPadding(base64.NoPadding).DecodeString(specsParam)
if err != nil {
diff --git a/internal/extractors/pwextractor/pwextractor.go b/internal/extractors/pwextractor/pwextractor.go
index a6c5ef9..99885a5 100644
--- a/internal/extractors/pwextractor/pwextractor.go
+++ b/internal/extractors/pwextractor/pwextractor.go
@@ -58,10 +58,10 @@ func (e *PwExtractor) Stop() error {
return nil
}
-func (e *PwExtractor) Extract(task models.Task) (result *models.TaskResult, errRet error) {
+func (e *PwExtractor) visitPage(pageUrl string, cb func(page playwright.Page) error) (errRet error) {
page, err := e.chrome.NewPage()
if err != nil {
- return nil, fmt.Errorf("browser new page: %w", err)
+ return fmt.Errorf("browser new page: %w", err)
}
defer func() {
err := page.Close()
@@ -71,25 +71,58 @@ func (e *PwExtractor) Extract(task models.Task) (result *models.TaskResult, errR
}()
log.Debugf("Page opened")
- if _, err := page.Goto(task.URL); err != nil {
- return nil, fmt.Errorf("goto page: %w", err)
+ if _, err := page.Goto(pageUrl); err != nil {
+ return fmt.Errorf("goto page: %w", err)
}
- log.Debugf("Url %s visited", task.URL)
+ log.Debugf("Url %s visited", pageUrl)
+ defer log.Debugf("Visiting page %s finished", pageUrl)
- parser := pageParser{
- task: task,
- page: page,
- }
+ return cb(page)
+}
- result, err = parser.parse()
- if err != nil {
- return nil, fmt.Errorf("parse page: %w", err)
- }
- if len(result.Items) == 0 {
- return nil, fmt.Errorf("extract failed for all posts")
- }
+func (e *PwExtractor) Extract(task models.Task) (result *models.TaskResult, errRet error) {
+ errRet = e.visitPage(task.URL, func(page playwright.Page) error {
+ parser := pageParser{
+ task: task,
+ page: page,
+ }
+ var err error
+ result, err = parser.parse()
+ if err != nil {
+ return fmt.Errorf("parse page: %w", err)
+ }
+ if len(result.Items) == 0 {
+ return fmt.Errorf("extract failed for all posts")
+ }
+ return nil
+ })
+ return
+}
- return result, nil
+func (e *PwExtractor) Screenshot(task models.Task) (result *models.ScreenshotTaskResult, errRet error) {
+ errRet = e.visitPage(task.URL, func(page playwright.Page) error {
+ err := page.WaitForLoadState(playwright.PageWaitForLoadStateOptions{
+ State: playwright.LoadStateNetworkidle,
+ Timeout: pwDuration("5s"),
+ })
+ if err != nil {
+ log.Debugf("Wait for network idle: %w", err)
+ }
+ if err := page.SetViewportSize(1280, 800); err != nil {
+ return fmt.Errorf("set viewport size: %w", err)
+ }
+ screenshot, err := page.Screenshot(playwright.PageScreenshotOptions{
+ Animations: playwright.ScreenshotAnimationsDisabled,
+ Timeout: pwDuration("5s"),
+ })
+ if err != nil {
+ return fmt.Errorf("make screenshot: %w", err)
+ }
+ log.Infof("Screenshot finished; total size: %d bytes", len(screenshot))
+ result = &models.ScreenshotTaskResult{Image: screenshot}
+ return nil
+ })
+ return
}
type pageParser struct {
diff --git a/internal/models/models.go b/internal/models/models.go
index 50a0757..689dbe9 100644
--- a/internal/models/models.go
+++ b/internal/models/models.go
@@ -6,8 +6,16 @@ import (
"time"
)
+type TaskType string
+
+const (
+ TaskTypeExtract = "extract"
+ TaskTypePageScreenshot = "page_screenshot"
+)
+
type Task struct {
// While adding new fields, dont forget to alter caching func
+ TaskType TaskType
URL string
SelectorPost string
SelectorTitle string
@@ -30,7 +38,7 @@ func (t Task) CacheKey() string {
h.Write([]byte(t.SelectorCreated))
h.Write([]byte(t.SelectorContent))
h.Write([]byte(t.SelectorEnclosure))
- return fmt.Sprintf("%x", h.Sum(nil))
+ return fmt.Sprintf("%s_%x", t.TaskType, h.Sum(nil))
}
type FeedItem struct {
@@ -50,3 +58,7 @@ type TaskResult struct {
Items []FeedItem
Icon string
}
+
+type ScreenshotTaskResult struct {
+ Image []byte // png
+}