proxy support; config validation
This commit is contained in:
parent
10172d7b7d
commit
94694b2fee
2
.gitignore
vendored
2
.gitignore
vendored
@ -1,2 +1,4 @@
|
|||||||
/.idea/
|
/.idea/
|
||||||
/trash/
|
/trash/
|
||||||
|
/todo.md
|
||||||
|
/.env.dev
|
||||||
|
|||||||
@ -1,6 +1,7 @@
|
|||||||
package main
|
package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"github.com/egor3f/rssalchemy/internal/config"
|
||||||
"github.com/egor3f/rssalchemy/internal/extractors/pwextractor"
|
"github.com/egor3f/rssalchemy/internal/extractors/pwextractor"
|
||||||
"github.com/egor3f/rssalchemy/internal/models"
|
"github.com/egor3f/rssalchemy/internal/models"
|
||||||
"github.com/labstack/gommon/log"
|
"github.com/labstack/gommon/log"
|
||||||
@ -11,10 +12,13 @@ func main() {
|
|||||||
log.SetLevel(log.DEBUG)
|
log.SetLevel(log.DEBUG)
|
||||||
log.SetHeader(`${level}`)
|
log.SetHeader(`${level}`)
|
||||||
|
|
||||||
|
// this code is temporary!
|
||||||
|
// todo: rewrite not to use hardcoded tasks
|
||||||
|
|
||||||
task := models.Task{
|
task := models.Task{
|
||||||
URL: "https://vombat.su",
|
URL: "https://2ip.ru",
|
||||||
SelectorPost: "div.post-body",
|
SelectorPost: "div.ip",
|
||||||
SelectorTitle: "h1 a",
|
SelectorTitle: "span",
|
||||||
SelectorLink: "h1 a",
|
SelectorLink: "h1 a",
|
||||||
SelectorDescription: "div.post-content-block p",
|
SelectorDescription: "div.post-content-block p",
|
||||||
SelectorAuthor: "a:has(> span.post-author)",
|
SelectorAuthor: "a:has(> span.post-author)",
|
||||||
@ -23,7 +27,12 @@ func main() {
|
|||||||
SelectorEnclosure: "article img.object-contain",
|
SelectorEnclosure: "article img.object-contain",
|
||||||
}
|
}
|
||||||
|
|
||||||
pwe, err := pwextractor.New()
|
cfg, err := config.Read()
|
||||||
|
if err != nil {
|
||||||
|
log.Panicf("read config: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
pwe, err := pwextractor.New(cfg)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Panicf("create pw extractor: %v", err)
|
log.Panicf("create pw extractor: %v", err)
|
||||||
}
|
}
|
||||||
|
|||||||
@ -8,11 +8,11 @@ import (
|
|||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
"github.com/egor3f/rssalchemy/internal/adapters/natsadapter"
|
"github.com/egor3f/rssalchemy/internal/adapters/natsadapter"
|
||||||
|
"github.com/egor3f/rssalchemy/internal/config"
|
||||||
"github.com/egor3f/rssalchemy/internal/models"
|
"github.com/egor3f/rssalchemy/internal/models"
|
||||||
"github.com/ericchiang/css"
|
"github.com/ericchiang/css"
|
||||||
"github.com/go-playground/validator/v10"
|
"github.com/go-playground/validator/v10"
|
||||||
"github.com/gorilla/feeds"
|
"github.com/gorilla/feeds"
|
||||||
"github.com/ilyakaznacheev/cleanenv"
|
|
||||||
"github.com/labstack/echo/v4"
|
"github.com/labstack/echo/v4"
|
||||||
"github.com/labstack/echo/v4/middleware"
|
"github.com/labstack/echo/v4/middleware"
|
||||||
"github.com/labstack/gommon/log"
|
"github.com/labstack/gommon/log"
|
||||||
@ -26,12 +26,6 @@ import (
|
|||||||
"time"
|
"time"
|
||||||
)
|
)
|
||||||
|
|
||||||
type Config struct {
|
|
||||||
WebserverAddress string `yaml:"webserver_address" env:"WEBSERVER_ADDRESS" env-required:"true"`
|
|
||||||
NatsUrl string `yaml:"nats_url" env:"NATS_URL" env-required:"true"`
|
|
||||||
Debug bool `yaml:"debug" env:"DEBUG"`
|
|
||||||
}
|
|
||||||
|
|
||||||
type Specs struct {
|
type Specs struct {
|
||||||
URL string `json:"URL" validate:"url"`
|
URL string `json:"URL" validate:"url"`
|
||||||
SelectorPost string `json:"selector_post" validate:"selector"`
|
SelectorPost string `json:"selector_post" validate:"selector"`
|
||||||
@ -46,8 +40,7 @@ type Specs struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
var cfg Config
|
cfg, err := config.Read()
|
||||||
err := cleanenv.ReadConfig("config.yml", &cfg)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Panicf("reading config failed: %v", err)
|
log.Panicf("reading config failed: %v", err)
|
||||||
}
|
}
|
||||||
|
|||||||
@ -5,25 +5,19 @@ import (
|
|||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
"github.com/egor3f/rssalchemy/internal/adapters/natsadapter"
|
"github.com/egor3f/rssalchemy/internal/adapters/natsadapter"
|
||||||
|
"github.com/egor3f/rssalchemy/internal/config"
|
||||||
"github.com/egor3f/rssalchemy/internal/extractors/pwextractor"
|
"github.com/egor3f/rssalchemy/internal/extractors/pwextractor"
|
||||||
"github.com/egor3f/rssalchemy/internal/models"
|
"github.com/egor3f/rssalchemy/internal/models"
|
||||||
"github.com/ilyakaznacheev/cleanenv"
|
|
||||||
"github.com/labstack/gommon/log"
|
"github.com/labstack/gommon/log"
|
||||||
"github.com/nats-io/nats.go"
|
"github.com/nats-io/nats.go"
|
||||||
"os"
|
"os"
|
||||||
"os/signal"
|
"os/signal"
|
||||||
)
|
)
|
||||||
|
|
||||||
type Config struct {
|
|
||||||
NatsUrl string `yaml:"nats_url" env:"NATS_URL" env-required:"true"`
|
|
||||||
Debug bool `yaml:"debug" env:"DEBUG"`
|
|
||||||
}
|
|
||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
var cfg Config
|
cfg, err := config.Read()
|
||||||
err := cleanenv.ReadConfig("config.yml", &cfg)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Panicf("reading config failed: %w", err)
|
log.Panicf("reading config failed: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
if cfg.Debug {
|
if cfg.Debug {
|
||||||
@ -53,7 +47,7 @@ func main() {
|
|||||||
log.Panicf("create nats adapter: %v", err)
|
log.Panicf("create nats adapter: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
pwe, err := pwextractor.New()
|
pwe, err := pwextractor.New(cfg)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Panicf("create pw extractor: %v", err)
|
log.Panicf("create pw extractor: %v", err)
|
||||||
}
|
}
|
||||||
|
|||||||
40
internal/config/config.go
Normal file
40
internal/config/config.go
Normal file
@ -0,0 +1,40 @@
|
|||||||
|
package config
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"github.com/go-playground/validator/v10"
|
||||||
|
"github.com/ilyakaznacheev/cleanenv"
|
||||||
|
"net/url"
|
||||||
|
"reflect"
|
||||||
|
"slices"
|
||||||
|
)
|
||||||
|
|
||||||
|
type Config struct {
|
||||||
|
WebserverAddress string `yaml:"webserver_address" env:"WEBSERVER_ADDRESS" env-required:"true" validate:"hostname_port"`
|
||||||
|
NatsUrl string `yaml:"nats_url" env:"NATS_URL" env-required:"true" validate:"url"`
|
||||||
|
Debug bool `yaml:"debug" env:"DEBUG"`
|
||||||
|
Proxy string `yaml:"proxy" env:"PROXY" env-default:"" validate:"omitempty,proxy"`
|
||||||
|
}
|
||||||
|
|
||||||
|
func Read() (Config, error) {
|
||||||
|
var cfg Config
|
||||||
|
err := cleanenv.ReadConfig("config.yml", &cfg)
|
||||||
|
if err != nil {
|
||||||
|
return Config{}, err
|
||||||
|
}
|
||||||
|
validate := validator.New()
|
||||||
|
if err := validate.RegisterValidation("proxy", validateProxy); err != nil {
|
||||||
|
panic(fmt.Errorf("register validation: %w", err))
|
||||||
|
}
|
||||||
|
err = validate.Struct(cfg)
|
||||||
|
return cfg, err
|
||||||
|
}
|
||||||
|
|
||||||
|
func validateProxy(fl validator.FieldLevel) bool {
|
||||||
|
if fl.Field().Kind() != reflect.String {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
validSchemes := []string{"http", "https", "socks"}
|
||||||
|
pUrl, err := url.Parse(fl.Field().String())
|
||||||
|
return err == nil && slices.Contains(validSchemes, pUrl.Scheme) && pUrl.Opaque == "" && pUrl.Path == ""
|
||||||
|
}
|
||||||
@ -3,6 +3,7 @@ package pwextractor
|
|||||||
import (
|
import (
|
||||||
_ "embed"
|
_ "embed"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"github.com/egor3f/rssalchemy/internal/config"
|
||||||
"github.com/egor3f/rssalchemy/internal/models"
|
"github.com/egor3f/rssalchemy/internal/models"
|
||||||
"github.com/labstack/gommon/log"
|
"github.com/labstack/gommon/log"
|
||||||
"github.com/markusmobius/go-dateparser"
|
"github.com/markusmobius/go-dateparser"
|
||||||
@ -23,16 +24,21 @@ type PwExtractor struct {
|
|||||||
chrome playwright.Browser
|
chrome playwright.Browser
|
||||||
}
|
}
|
||||||
|
|
||||||
func New() (*PwExtractor, error) {
|
func New(cfg config.Config) (*PwExtractor, error) {
|
||||||
e := PwExtractor{}
|
e := PwExtractor{}
|
||||||
var err error
|
var err error
|
||||||
e.pw, err = playwright.Run()
|
e.pw, err = playwright.Run()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("run playwright: %w", err)
|
return nil, fmt.Errorf("run playwright: %w", err)
|
||||||
}
|
}
|
||||||
|
proxy, err := parseProxy(cfg.Proxy)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("parse proxy: %w", err)
|
||||||
|
}
|
||||||
e.chrome, err = e.pw.Chromium.Launch(playwright.BrowserTypeLaunchOptions{
|
e.chrome, err = e.pw.Chromium.Launch(playwright.BrowserTypeLaunchOptions{
|
||||||
ChromiumSandbox: playwright.Bool(true),
|
ChromiumSandbox: playwright.Bool(true),
|
||||||
HandleSIGINT: playwright.Bool(false),
|
HandleSIGINT: playwright.Bool(false),
|
||||||
|
Proxy: proxy,
|
||||||
Timeout: pwDuration("5s"),
|
Timeout: pwDuration("5s"),
|
||||||
})
|
})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|||||||
@ -30,3 +30,24 @@ func pwDuration(s string) *float64 {
|
|||||||
f64 := float64(dur.Milliseconds())
|
f64 := float64(dur.Milliseconds())
|
||||||
return &f64
|
return &f64
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func parseProxy(s string) (*playwright.Proxy, error) {
|
||||||
|
var proxy *playwright.Proxy
|
||||||
|
if len(s) > 0 {
|
||||||
|
proxyUrl, err := url.Parse(s)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
urlWithoutUser := *proxyUrl
|
||||||
|
urlWithoutUser.User = nil
|
||||||
|
proxy = &playwright.Proxy{Server: urlWithoutUser.String()}
|
||||||
|
if proxyUrl.User != nil {
|
||||||
|
user := proxyUrl.User.Username()
|
||||||
|
proxy.Username = &user
|
||||||
|
if pass, exist := proxyUrl.User.Password(); exist {
|
||||||
|
proxy.Password = &pass
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return proxy, nil
|
||||||
|
}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user