117 lines
2.8 KiB
Go
117 lines
2.8 KiB
Go
package pwextractor
|
|
|
|
import (
|
|
"fmt"
|
|
"github.com/jellydator/ttlcache/v3"
|
|
"github.com/playwright-community/playwright-go"
|
|
"net"
|
|
"net/url"
|
|
"slices"
|
|
"strings"
|
|
"time"
|
|
)
|
|
|
|
func absUrl(link string, page playwright.Page) string {
|
|
if len(link) == 0 {
|
|
return ""
|
|
}
|
|
if strings.HasPrefix(link, "/") {
|
|
pageUrl, _ := url.Parse(page.URL())
|
|
link = fmt.Sprintf("%s://%s%s", pageUrl.Scheme, pageUrl.Host, link)
|
|
}
|
|
//log.Debugf("link=%s", link)
|
|
return link
|
|
}
|
|
|
|
// pwDuration converts string like "10s" to milliseconds float64 pointer
|
|
// needed for Playwright timeouts (wtf? why they don't use normal Durations?)
|
|
func pwDuration(s string) *float64 {
|
|
dur, err := time.ParseDuration(s)
|
|
if err != nil {
|
|
panic(fmt.Errorf("failed to parse duration %s: %w", s, err))
|
|
}
|
|
f64 := float64(dur.Milliseconds())
|
|
return &f64
|
|
}
|
|
|
|
func parseProxy(s string) (*playwright.Proxy, error) {
|
|
var proxy *playwright.Proxy
|
|
if len(s) > 0 {
|
|
proxyUrl, err := url.Parse(s)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
urlWithoutUser := *proxyUrl
|
|
urlWithoutUser.User = nil
|
|
proxy = &playwright.Proxy{Server: urlWithoutUser.String()}
|
|
if proxyUrl.User != nil {
|
|
user := proxyUrl.User.Username()
|
|
proxy.Username = &user
|
|
if pass, exist := proxyUrl.User.Password(); exist {
|
|
proxy.Password = &pass
|
|
}
|
|
}
|
|
}
|
|
return proxy, nil
|
|
}
|
|
|
|
func parseBaseDomain(urlStr string) (domain string, scheme string, err error) {
|
|
pageUrl, err := url.Parse(urlStr)
|
|
if err != nil {
|
|
return "", "", fmt.Errorf("task url parsing: %w", err)
|
|
}
|
|
domainParts := strings.Split(pageUrl.Host, ".")
|
|
slices.Reverse(domainParts) // com, example, www
|
|
scheme = pageUrl.Scheme
|
|
if !slices.Contains([]string{"https", "http"}, scheme) {
|
|
return "", "", fmt.Errorf("bad scheme: %s", scheme)
|
|
}
|
|
return fmt.Sprintf("%s.%s", domainParts[1], domainParts[0]), scheme, nil
|
|
}
|
|
|
|
var dnsCache *ttlcache.Cache[string, []net.IP]
|
|
|
|
func init() {
|
|
dnsCache = ttlcache.New[string, []net.IP](
|
|
ttlcache.WithTTL[string, []net.IP](1*time.Minute),
|
|
ttlcache.WithDisableTouchOnHit[string, []net.IP](),
|
|
)
|
|
go dnsCache.Start()
|
|
}
|
|
|
|
// getIPs from url, hostname, ip string
|
|
// result slice len always > 0 if error is nil
|
|
func getIPs(host string) ([]net.IP, error) {
|
|
ip := net.ParseIP(host)
|
|
if ip != nil {
|
|
return []net.IP{ip}, nil
|
|
}
|
|
|
|
urlStruct, err := url.Parse(host)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("url parse: %w", err)
|
|
}
|
|
if len(urlStruct.Host) > 0 {
|
|
host = urlStruct.Hostname()
|
|
ip = net.ParseIP(host)
|
|
if ip != nil {
|
|
return []net.IP{ip}, nil
|
|
}
|
|
}
|
|
|
|
var ips []net.IP
|
|
if dnsCache.Has(host) {
|
|
ips = dnsCache.Get(host).Value()
|
|
} else {
|
|
ips, err = net.LookupIP(host)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("lookup ip: %w", err)
|
|
}
|
|
dnsCache.Set(host, ips, ttlcache.DefaultTTL)
|
|
}
|
|
if len(ips) == 0 {
|
|
return nil, fmt.Errorf("lookip ip: not resolved")
|
|
}
|
|
return ips, nil
|
|
}
|