Files
outward-scrapper/internal/scraper/config.go

103 lines
2.7 KiB
Go
Raw Normal View History

2026-03-15 16:42:43 +02:00
package scraper
import (
"regexp"
"time"
)
type Config struct {
BaseURL string
AllowedDomain string
MaxDepth int
MaxRetries int
MaxQueuedPages int
RequestDelay time.Duration
RequestJitter time.Duration
RequestTimeout time.Duration
RetryBaseDelay time.Duration
ProgressEvery time.Duration
BrowserReferrer string
BrowserAgents []string
ItemSeeds []string
CraftingSeeds []string
IgnoredPrefixes []string
IgnoredExact map[string]bool
ItemListPathRe *regexp.Regexp
CraftingPathRe *regexp.Regexp
AmountPrefixRe *regexp.Regexp
WhitespaceRe *regexp.Regexp
SpinnerFrames []string
}
func DefaultConfig() Config {
baseURL := "https://outward.fandom.com"
return Config{
BaseURL: baseURL,
AllowedDomain: "outward.fandom.com",
MaxDepth: 3,
MaxRetries: 2,
MaxQueuedPages: 1500,
RequestDelay: 650 * time.Millisecond,
RequestJitter: 350 * time.Millisecond,
RequestTimeout: 8 * time.Second,
RetryBaseDelay: 1200 * time.Millisecond,
ProgressEvery: 3 * time.Second,
BrowserReferrer: baseURL + "/",
BrowserAgents: []string{
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36",
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36",
},
ItemSeeds: []string{
baseURL + "/wiki/Items/Weapons",
baseURL + "/wiki/Items/Equipment",
baseURL + "/wiki/Items/Consumables",
baseURL + "/wiki/Items/Ingredients",
baseURL + "/wiki/Items/Deployables",
baseURL + "/wiki/Items/Other",
baseURL + "/wiki/Items/Item_Values",
},
CraftingSeeds: []string{
baseURL + "/wiki/Crafting",
baseURL + "/wiki/Crafting/Survival",
baseURL + "/wiki/Crafting/Cooking",
baseURL + "/wiki/Crafting/Alchemy",
},
IgnoredPrefixes: []string{
"/wiki/File:",
"/wiki/Category:",
"/wiki/Special:",
"/wiki/Help:",
"/wiki/Template:",
"/wiki/User:",
"/wiki/User_blog:",
"/wiki/Forum:",
"/wiki/Message_Wall:",
"/wiki/Thread:",
"/wiki/Map:",
},
IgnoredExact: map[string]bool{
"/wiki/Outward_Wiki": true,
"/wiki/Items": true,
"/wiki/Crafting": false,
},
ItemListPathRe: regexp.MustCompile(`^/wiki/Items(?:/|$)`),
CraftingPathRe: regexp.MustCompile(`^/wiki/Crafting(?:/|$)`),
AmountPrefixRe: regexp.MustCompile(`^\s*(\d+x)\s+`),
WhitespaceRe: regexp.MustCompile(`\s+`),
SpinnerFrames: []string{
"⠋",
"⠙",
"⠹",
"⠸",
"⠼",
"⠴",
"⠦",
"⠧",
"⠇",
"⠏",
},
}
}