proxy support; config validation

This commit is contained in:
Egor Aristov 2025-01-18 12:45:04 +03:00
parent 10172d7b7d
commit 94694b2fee
Signed by: egor3f
GPG Key ID: 40482A264AAEC85F
7 changed files with 89 additions and 24 deletions

2
.gitignore vendored
View File

@ -1,2 +1,4 @@
/.idea/ /.idea/
/trash/ /trash/
/todo.md
/.env.dev

View File

@ -1,6 +1,7 @@
package main package main
import ( import (
"github.com/egor3f/rssalchemy/internal/config"
"github.com/egor3f/rssalchemy/internal/extractors/pwextractor" "github.com/egor3f/rssalchemy/internal/extractors/pwextractor"
"github.com/egor3f/rssalchemy/internal/models" "github.com/egor3f/rssalchemy/internal/models"
"github.com/labstack/gommon/log" "github.com/labstack/gommon/log"
@ -11,10 +12,13 @@ func main() {
log.SetLevel(log.DEBUG) log.SetLevel(log.DEBUG)
log.SetHeader(`${level}`) log.SetHeader(`${level}`)
// this code is temporary!
// todo: rewrite not to use hardcoded tasks
task := models.Task{ task := models.Task{
URL: "https://vombat.su", URL: "https://2ip.ru",
SelectorPost: "div.post-body", SelectorPost: "div.ip",
SelectorTitle: "h1 a", SelectorTitle: "span",
SelectorLink: "h1 a", SelectorLink: "h1 a",
SelectorDescription: "div.post-content-block p", SelectorDescription: "div.post-content-block p",
SelectorAuthor: "a:has(> span.post-author)", SelectorAuthor: "a:has(> span.post-author)",
@ -23,7 +27,12 @@ func main() {
SelectorEnclosure: "article img.object-contain", SelectorEnclosure: "article img.object-contain",
} }
pwe, err := pwextractor.New() cfg, err := config.Read()
if err != nil {
log.Panicf("read config: %v", err)
}
pwe, err := pwextractor.New(cfg)
if err != nil { if err != nil {
log.Panicf("create pw extractor: %v", err) log.Panicf("create pw extractor: %v", err)
} }

View File

@ -8,11 +8,11 @@ import (
"encoding/json" "encoding/json"
"fmt" "fmt"
"github.com/egor3f/rssalchemy/internal/adapters/natsadapter" "github.com/egor3f/rssalchemy/internal/adapters/natsadapter"
"github.com/egor3f/rssalchemy/internal/config"
"github.com/egor3f/rssalchemy/internal/models" "github.com/egor3f/rssalchemy/internal/models"
"github.com/ericchiang/css" "github.com/ericchiang/css"
"github.com/go-playground/validator/v10" "github.com/go-playground/validator/v10"
"github.com/gorilla/feeds" "github.com/gorilla/feeds"
"github.com/ilyakaznacheev/cleanenv"
"github.com/labstack/echo/v4" "github.com/labstack/echo/v4"
"github.com/labstack/echo/v4/middleware" "github.com/labstack/echo/v4/middleware"
"github.com/labstack/gommon/log" "github.com/labstack/gommon/log"
@ -26,12 +26,6 @@ import (
"time" "time"
) )
type Config struct {
WebserverAddress string `yaml:"webserver_address" env:"WEBSERVER_ADDRESS" env-required:"true"`
NatsUrl string `yaml:"nats_url" env:"NATS_URL" env-required:"true"`
Debug bool `yaml:"debug" env:"DEBUG"`
}
type Specs struct { type Specs struct {
URL string `json:"URL" validate:"url"` URL string `json:"URL" validate:"url"`
SelectorPost string `json:"selector_post" validate:"selector"` SelectorPost string `json:"selector_post" validate:"selector"`
@ -46,8 +40,7 @@ type Specs struct {
} }
func main() { func main() {
var cfg Config cfg, err := config.Read()
err := cleanenv.ReadConfig("config.yml", &cfg)
if err != nil { if err != nil {
log.Panicf("reading config failed: %v", err) log.Panicf("reading config failed: %v", err)
} }

View File

@ -5,25 +5,19 @@ import (
"encoding/json" "encoding/json"
"fmt" "fmt"
"github.com/egor3f/rssalchemy/internal/adapters/natsadapter" "github.com/egor3f/rssalchemy/internal/adapters/natsadapter"
"github.com/egor3f/rssalchemy/internal/config"
"github.com/egor3f/rssalchemy/internal/extractors/pwextractor" "github.com/egor3f/rssalchemy/internal/extractors/pwextractor"
"github.com/egor3f/rssalchemy/internal/models" "github.com/egor3f/rssalchemy/internal/models"
"github.com/ilyakaznacheev/cleanenv"
"github.com/labstack/gommon/log" "github.com/labstack/gommon/log"
"github.com/nats-io/nats.go" "github.com/nats-io/nats.go"
"os" "os"
"os/signal" "os/signal"
) )
type Config struct {
NatsUrl string `yaml:"nats_url" env:"NATS_URL" env-required:"true"`
Debug bool `yaml:"debug" env:"DEBUG"`
}
func main() { func main() {
var cfg Config cfg, err := config.Read()
err := cleanenv.ReadConfig("config.yml", &cfg)
if err != nil { if err != nil {
log.Panicf("reading config failed: %w", err) log.Panicf("reading config failed: %v", err)
} }
if cfg.Debug { if cfg.Debug {
@ -53,7 +47,7 @@ func main() {
log.Panicf("create nats adapter: %v", err) log.Panicf("create nats adapter: %v", err)
} }
pwe, err := pwextractor.New() pwe, err := pwextractor.New(cfg)
if err != nil { if err != nil {
log.Panicf("create pw extractor: %v", err) log.Panicf("create pw extractor: %v", err)
} }

40
internal/config/config.go Normal file
View File

@ -0,0 +1,40 @@
package config
import (
"fmt"
"github.com/go-playground/validator/v10"
"github.com/ilyakaznacheev/cleanenv"
"net/url"
"reflect"
"slices"
)
type Config struct {
WebserverAddress string `yaml:"webserver_address" env:"WEBSERVER_ADDRESS" env-required:"true" validate:"hostname_port"`
NatsUrl string `yaml:"nats_url" env:"NATS_URL" env-required:"true" validate:"url"`
Debug bool `yaml:"debug" env:"DEBUG"`
Proxy string `yaml:"proxy" env:"PROXY" env-default:"" validate:"omitempty,proxy"`
}
func Read() (Config, error) {
var cfg Config
err := cleanenv.ReadConfig("config.yml", &cfg)
if err != nil {
return Config{}, err
}
validate := validator.New()
if err := validate.RegisterValidation("proxy", validateProxy); err != nil {
panic(fmt.Errorf("register validation: %w", err))
}
err = validate.Struct(cfg)
return cfg, err
}
func validateProxy(fl validator.FieldLevel) bool {
if fl.Field().Kind() != reflect.String {
return false
}
validSchemes := []string{"http", "https", "socks"}
pUrl, err := url.Parse(fl.Field().String())
return err == nil && slices.Contains(validSchemes, pUrl.Scheme) && pUrl.Opaque == "" && pUrl.Path == ""
}

View File

@ -3,6 +3,7 @@ package pwextractor
import ( import (
_ "embed" _ "embed"
"fmt" "fmt"
"github.com/egor3f/rssalchemy/internal/config"
"github.com/egor3f/rssalchemy/internal/models" "github.com/egor3f/rssalchemy/internal/models"
"github.com/labstack/gommon/log" "github.com/labstack/gommon/log"
"github.com/markusmobius/go-dateparser" "github.com/markusmobius/go-dateparser"
@ -23,16 +24,21 @@ type PwExtractor struct {
chrome playwright.Browser chrome playwright.Browser
} }
func New() (*PwExtractor, error) { func New(cfg config.Config) (*PwExtractor, error) {
e := PwExtractor{} e := PwExtractor{}
var err error var err error
e.pw, err = playwright.Run() e.pw, err = playwright.Run()
if err != nil { if err != nil {
return nil, fmt.Errorf("run playwright: %w", err) return nil, fmt.Errorf("run playwright: %w", err)
} }
proxy, err := parseProxy(cfg.Proxy)
if err != nil {
return nil, fmt.Errorf("parse proxy: %w", err)
}
e.chrome, err = e.pw.Chromium.Launch(playwright.BrowserTypeLaunchOptions{ e.chrome, err = e.pw.Chromium.Launch(playwright.BrowserTypeLaunchOptions{
ChromiumSandbox: playwright.Bool(true), ChromiumSandbox: playwright.Bool(true),
HandleSIGINT: playwright.Bool(false), HandleSIGINT: playwright.Bool(false),
Proxy: proxy,
Timeout: pwDuration("5s"), Timeout: pwDuration("5s"),
}) })
if err != nil { if err != nil {

View File

@ -30,3 +30,24 @@ func pwDuration(s string) *float64 {
f64 := float64(dur.Milliseconds()) f64 := float64(dur.Milliseconds())
return &f64 return &f64
} }
func parseProxy(s string) (*playwright.Proxy, error) {
var proxy *playwright.Proxy
if len(s) > 0 {
proxyUrl, err := url.Parse(s)
if err != nil {
return nil, err
}
urlWithoutUser := *proxyUrl
urlWithoutUser.User = nil
proxy = &playwright.Proxy{Server: urlWithoutUser.String()}
if proxyUrl.User != nil {
user := proxyUrl.User.Username()
proxy.Username = &user
if pass, exist := proxyUrl.User.Password(); exist {
proxy.Password = &pass
}
}
}
return proxy, nil
}