diff --git a/cmd/extractor/extractor.go b/cmd/extractor/extractor.go index 41159cd..d7f9890 100644 --- a/cmd/extractor/extractor.go +++ b/cmd/extractor/extractor.go @@ -16,9 +16,9 @@ func main() { // todo: rewrite not to use hardcoded tasks task := models.Task{ - URL: "https://2ip.ru", - SelectorPost: "div.ip", - SelectorTitle: "span", + URL: "https://vombat.su", + SelectorPost: "div.post-body", + SelectorTitle: "h1 a", SelectorLink: "h1 a", SelectorDescription: "div.post-content-block p", SelectorAuthor: "a:has(> span.post-author)", diff --git a/internal/extractors/pwextractor/pwextractor.go b/internal/extractors/pwextractor/pwextractor.go index 1773fc7..18e346c 100644 --- a/internal/extractors/pwextractor/pwextractor.go +++ b/internal/extractors/pwextractor/pwextractor.go @@ -10,6 +10,7 @@ import ( "github.com/markusmobius/go-dateparser" "github.com/playwright-community/playwright-go" "maps" + "strings" ) // Timeouts @@ -96,12 +97,13 @@ func (e *PwExtractor) visitPage(task models.Task, cb func(page playwright.Page) } var pwCookies []playwright.OptionalCookie - for k, v := range cookies { + for _, cook := range cookies { pwCookies = append(pwCookies, playwright.OptionalCookie{ - Name: k, - Value: v, + Name: cook[0], + Value: cook[1], Domain: playwright.String(fmt.Sprintf(".%s", baseDomain)), Path: playwright.String("/"), + Secure: playwright.Bool(strings.HasPrefix(cook[0], "__Secure")), }) } diff --git a/internal/extractors/pwextractor/utils.go b/internal/extractors/pwextractor/utils.go index 6eac213..071b795 100644 --- a/internal/extractors/pwextractor/utils.go +++ b/internal/extractors/pwextractor/utils.go @@ -63,21 +63,20 @@ func parseBaseDomain(urlStr string) (string, error) { return fmt.Sprintf("%s.%s", domainParts[1], domainParts[0]), nil } -func parseCookieString(cookieStr string) (map[string]string, error) { - result := make(map[string]string) - failed := fmt.Errorf("failed to parse cookies") +func parseCookieString(cookieStr string) ([][2]string, error) { + var result [][2]string for _, cook := range strings.Split(cookieStr, ";") { kv := strings.Split(cook, "=") - if len(kv) != 2 { - return nil, failed + if len(kv) < 2 { + return nil, fmt.Errorf("failed to parse cookies: split by =: count<2") } k, err1 := url.QueryUnescape(kv[0]) - v, err2 := url.QueryUnescape(kv[1]) + v, err2 := url.QueryUnescape(strings.Join(kv[1:], "=")) if err1 != nil || err2 != nil { - return nil, failed + return nil, fmt.Errorf("failed to parse cookies: unescape k=%w v=%w", err1, err2) } - result[k] = v + result = append(result, [2]string{strings.TrimSpace(k), strings.TrimSpace(v)}) } return result, nil