From 4a806df5ab6d3f460f58d0e961a81af6aa79f0f2 Mon Sep 17 00:00:00 2001 From: Egor Aristov Date: Sat, 25 Jan 2025 12:03:51 +0300 Subject: [PATCH] rewrite extractor script to not use hard-coded task --- .gitignore | 1 + cmd/extractor/extractor.go | 29 ++++++++++++++++------------- 2 files changed, 17 insertions(+), 13 deletions(-) diff --git a/.gitignore b/.gitignore index 1703b2c..f8aa798 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,4 @@ /trash/ /todo.md /.env.dev +/task.json diff --git a/cmd/extractor/extractor.go b/cmd/extractor/extractor.go index d7f9890..a5142ed 100644 --- a/cmd/extractor/extractor.go +++ b/cmd/extractor/extractor.go @@ -1,30 +1,33 @@ package main import ( + "encoding/json" "github.com/egor3f/rssalchemy/internal/config" "github.com/egor3f/rssalchemy/internal/extractors/pwextractor" "github.com/egor3f/rssalchemy/internal/models" "github.com/labstack/gommon/log" "github.com/yassinebenaid/godump" + "io" + "os" ) func main() { log.SetLevel(log.DEBUG) log.SetHeader(`${time_rfc3339_nano} ${level}`) - // this code is temporary! - // todo: rewrite not to use hardcoded tasks - - task := models.Task{ - URL: "https://vombat.su", - SelectorPost: "div.post-body", - SelectorTitle: "h1 a", - SelectorLink: "h1 a", - SelectorDescription: "div.post-content-block p", - SelectorAuthor: "a:has(> span.post-author)", - SelectorCreated: "div:nth-of-type(1) > div:nth-of-type(1) > div:nth-of-type(1) > div:nth-of-type(2)", - SelectorContent: "div.post-content-block", - SelectorEnclosure: "article img.object-contain", + taskFile, err := os.Open("task.json") + if err != nil { + log.Panicf("open file: %v", err) + } + //goland:noinspection GoUnhandledErrorResult + defer taskFile.Close() + fileContents, err := io.ReadAll(taskFile) + if err != nil { + log.Panicf("read file: %v", err) + } + var task models.Task + if err := json.Unmarshal(fileContents, &task); err != nil { + log.Panicf("unmarshal task: %v", err) } cfg, err := config.Read()