proxy support; config validation
This commit is contained in:
parent
10172d7b7d
commit
94694b2fee
2
.gitignore
vendored
2
.gitignore
vendored
@ -1,2 +1,4 @@
|
||||
/.idea/
|
||||
/trash/
|
||||
/todo.md
|
||||
/.env.dev
|
||||
|
||||
@ -1,6 +1,7 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"github.com/egor3f/rssalchemy/internal/config"
|
||||
"github.com/egor3f/rssalchemy/internal/extractors/pwextractor"
|
||||
"github.com/egor3f/rssalchemy/internal/models"
|
||||
"github.com/labstack/gommon/log"
|
||||
@ -11,10 +12,13 @@ func main() {
|
||||
log.SetLevel(log.DEBUG)
|
||||
log.SetHeader(`${level}`)
|
||||
|
||||
// this code is temporary!
|
||||
// todo: rewrite not to use hardcoded tasks
|
||||
|
||||
task := models.Task{
|
||||
URL: "https://vombat.su",
|
||||
SelectorPost: "div.post-body",
|
||||
SelectorTitle: "h1 a",
|
||||
URL: "https://2ip.ru",
|
||||
SelectorPost: "div.ip",
|
||||
SelectorTitle: "span",
|
||||
SelectorLink: "h1 a",
|
||||
SelectorDescription: "div.post-content-block p",
|
||||
SelectorAuthor: "a:has(> span.post-author)",
|
||||
@ -23,7 +27,12 @@ func main() {
|
||||
SelectorEnclosure: "article img.object-contain",
|
||||
}
|
||||
|
||||
pwe, err := pwextractor.New()
|
||||
cfg, err := config.Read()
|
||||
if err != nil {
|
||||
log.Panicf("read config: %v", err)
|
||||
}
|
||||
|
||||
pwe, err := pwextractor.New(cfg)
|
||||
if err != nil {
|
||||
log.Panicf("create pw extractor: %v", err)
|
||||
}
|
||||
|
||||
@ -8,11 +8,11 @@ import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"github.com/egor3f/rssalchemy/internal/adapters/natsadapter"
|
||||
"github.com/egor3f/rssalchemy/internal/config"
|
||||
"github.com/egor3f/rssalchemy/internal/models"
|
||||
"github.com/ericchiang/css"
|
||||
"github.com/go-playground/validator/v10"
|
||||
"github.com/gorilla/feeds"
|
||||
"github.com/ilyakaznacheev/cleanenv"
|
||||
"github.com/labstack/echo/v4"
|
||||
"github.com/labstack/echo/v4/middleware"
|
||||
"github.com/labstack/gommon/log"
|
||||
@ -26,12 +26,6 @@ import (
|
||||
"time"
|
||||
)
|
||||
|
||||
type Config struct {
|
||||
WebserverAddress string `yaml:"webserver_address" env:"WEBSERVER_ADDRESS" env-required:"true"`
|
||||
NatsUrl string `yaml:"nats_url" env:"NATS_URL" env-required:"true"`
|
||||
Debug bool `yaml:"debug" env:"DEBUG"`
|
||||
}
|
||||
|
||||
type Specs struct {
|
||||
URL string `json:"URL" validate:"url"`
|
||||
SelectorPost string `json:"selector_post" validate:"selector"`
|
||||
@ -46,8 +40,7 @@ type Specs struct {
|
||||
}
|
||||
|
||||
func main() {
|
||||
var cfg Config
|
||||
err := cleanenv.ReadConfig("config.yml", &cfg)
|
||||
cfg, err := config.Read()
|
||||
if err != nil {
|
||||
log.Panicf("reading config failed: %v", err)
|
||||
}
|
||||
|
||||
@ -5,25 +5,19 @@ import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"github.com/egor3f/rssalchemy/internal/adapters/natsadapter"
|
||||
"github.com/egor3f/rssalchemy/internal/config"
|
||||
"github.com/egor3f/rssalchemy/internal/extractors/pwextractor"
|
||||
"github.com/egor3f/rssalchemy/internal/models"
|
||||
"github.com/ilyakaznacheev/cleanenv"
|
||||
"github.com/labstack/gommon/log"
|
||||
"github.com/nats-io/nats.go"
|
||||
"os"
|
||||
"os/signal"
|
||||
)
|
||||
|
||||
type Config struct {
|
||||
NatsUrl string `yaml:"nats_url" env:"NATS_URL" env-required:"true"`
|
||||
Debug bool `yaml:"debug" env:"DEBUG"`
|
||||
}
|
||||
|
||||
func main() {
|
||||
var cfg Config
|
||||
err := cleanenv.ReadConfig("config.yml", &cfg)
|
||||
cfg, err := config.Read()
|
||||
if err != nil {
|
||||
log.Panicf("reading config failed: %w", err)
|
||||
log.Panicf("reading config failed: %v", err)
|
||||
}
|
||||
|
||||
if cfg.Debug {
|
||||
@ -53,7 +47,7 @@ func main() {
|
||||
log.Panicf("create nats adapter: %v", err)
|
||||
}
|
||||
|
||||
pwe, err := pwextractor.New()
|
||||
pwe, err := pwextractor.New(cfg)
|
||||
if err != nil {
|
||||
log.Panicf("create pw extractor: %v", err)
|
||||
}
|
||||
|
||||
40
internal/config/config.go
Normal file
40
internal/config/config.go
Normal file
@ -0,0 +1,40 @@
|
||||
package config
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"github.com/go-playground/validator/v10"
|
||||
"github.com/ilyakaznacheev/cleanenv"
|
||||
"net/url"
|
||||
"reflect"
|
||||
"slices"
|
||||
)
|
||||
|
||||
type Config struct {
|
||||
WebserverAddress string `yaml:"webserver_address" env:"WEBSERVER_ADDRESS" env-required:"true" validate:"hostname_port"`
|
||||
NatsUrl string `yaml:"nats_url" env:"NATS_URL" env-required:"true" validate:"url"`
|
||||
Debug bool `yaml:"debug" env:"DEBUG"`
|
||||
Proxy string `yaml:"proxy" env:"PROXY" env-default:"" validate:"omitempty,proxy"`
|
||||
}
|
||||
|
||||
func Read() (Config, error) {
|
||||
var cfg Config
|
||||
err := cleanenv.ReadConfig("config.yml", &cfg)
|
||||
if err != nil {
|
||||
return Config{}, err
|
||||
}
|
||||
validate := validator.New()
|
||||
if err := validate.RegisterValidation("proxy", validateProxy); err != nil {
|
||||
panic(fmt.Errorf("register validation: %w", err))
|
||||
}
|
||||
err = validate.Struct(cfg)
|
||||
return cfg, err
|
||||
}
|
||||
|
||||
func validateProxy(fl validator.FieldLevel) bool {
|
||||
if fl.Field().Kind() != reflect.String {
|
||||
return false
|
||||
}
|
||||
validSchemes := []string{"http", "https", "socks"}
|
||||
pUrl, err := url.Parse(fl.Field().String())
|
||||
return err == nil && slices.Contains(validSchemes, pUrl.Scheme) && pUrl.Opaque == "" && pUrl.Path == ""
|
||||
}
|
||||
@ -3,6 +3,7 @@ package pwextractor
|
||||
import (
|
||||
_ "embed"
|
||||
"fmt"
|
||||
"github.com/egor3f/rssalchemy/internal/config"
|
||||
"github.com/egor3f/rssalchemy/internal/models"
|
||||
"github.com/labstack/gommon/log"
|
||||
"github.com/markusmobius/go-dateparser"
|
||||
@ -23,16 +24,21 @@ type PwExtractor struct {
|
||||
chrome playwright.Browser
|
||||
}
|
||||
|
||||
func New() (*PwExtractor, error) {
|
||||
func New(cfg config.Config) (*PwExtractor, error) {
|
||||
e := PwExtractor{}
|
||||
var err error
|
||||
e.pw, err = playwright.Run()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("run playwright: %w", err)
|
||||
}
|
||||
proxy, err := parseProxy(cfg.Proxy)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("parse proxy: %w", err)
|
||||
}
|
||||
e.chrome, err = e.pw.Chromium.Launch(playwright.BrowserTypeLaunchOptions{
|
||||
ChromiumSandbox: playwright.Bool(true),
|
||||
HandleSIGINT: playwright.Bool(false),
|
||||
Proxy: proxy,
|
||||
Timeout: pwDuration("5s"),
|
||||
})
|
||||
if err != nil {
|
||||
|
||||
@ -30,3 +30,24 @@ func pwDuration(s string) *float64 {
|
||||
f64 := float64(dur.Milliseconds())
|
||||
return &f64
|
||||
}
|
||||
|
||||
func parseProxy(s string) (*playwright.Proxy, error) {
|
||||
var proxy *playwright.Proxy
|
||||
if len(s) > 0 {
|
||||
proxyUrl, err := url.Parse(s)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
urlWithoutUser := *proxyUrl
|
||||
urlWithoutUser.User = nil
|
||||
proxy = &playwright.Proxy{Server: urlWithoutUser.String()}
|
||||
if proxyUrl.User != nil {
|
||||
user := proxyUrl.User.Username()
|
||||
proxy.Username = &user
|
||||
if pass, exist := proxyUrl.User.Password(); exist {
|
||||
proxy.Password = &pass
|
||||
}
|
||||
}
|
||||
}
|
||||
return proxy, nil
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user