103 lines
2.7 KiB
Go
103 lines
2.7 KiB
Go
|
|
package scraper
|
||
|
|
|
||
|
|
import (
|
||
|
|
"regexp"
|
||
|
|
"time"
|
||
|
|
)
|
||
|
|
|
||
|
|
type Config struct {
|
||
|
|
BaseURL string
|
||
|
|
AllowedDomain string
|
||
|
|
MaxDepth int
|
||
|
|
MaxRetries int
|
||
|
|
MaxQueuedPages int
|
||
|
|
RequestDelay time.Duration
|
||
|
|
RequestJitter time.Duration
|
||
|
|
RequestTimeout time.Duration
|
||
|
|
RetryBaseDelay time.Duration
|
||
|
|
ProgressEvery time.Duration
|
||
|
|
BrowserReferrer string
|
||
|
|
BrowserAgents []string
|
||
|
|
ItemSeeds []string
|
||
|
|
CraftingSeeds []string
|
||
|
|
IgnoredPrefixes []string
|
||
|
|
IgnoredExact map[string]bool
|
||
|
|
ItemListPathRe *regexp.Regexp
|
||
|
|
CraftingPathRe *regexp.Regexp
|
||
|
|
AmountPrefixRe *regexp.Regexp
|
||
|
|
WhitespaceRe *regexp.Regexp
|
||
|
|
SpinnerFrames []string
|
||
|
|
}
|
||
|
|
|
||
|
|
func DefaultConfig() Config {
|
||
|
|
baseURL := "https://outward.fandom.com"
|
||
|
|
|
||
|
|
return Config{
|
||
|
|
BaseURL: baseURL,
|
||
|
|
AllowedDomain: "outward.fandom.com",
|
||
|
|
MaxDepth: 3,
|
||
|
|
MaxRetries: 2,
|
||
|
|
MaxQueuedPages: 1500,
|
||
|
|
RequestDelay: 650 * time.Millisecond,
|
||
|
|
RequestJitter: 350 * time.Millisecond,
|
||
|
|
RequestTimeout: 8 * time.Second,
|
||
|
|
RetryBaseDelay: 1200 * time.Millisecond,
|
||
|
|
ProgressEvery: 3 * time.Second,
|
||
|
|
BrowserReferrer: baseURL + "/",
|
||
|
|
BrowserAgents: []string{
|
||
|
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36",
|
||
|
|
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36",
|
||
|
|
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36",
|
||
|
|
},
|
||
|
|
ItemSeeds: []string{
|
||
|
|
baseURL + "/wiki/Items/Weapons",
|
||
|
|
baseURL + "/wiki/Items/Equipment",
|
||
|
|
baseURL + "/wiki/Items/Consumables",
|
||
|
|
baseURL + "/wiki/Items/Ingredients",
|
||
|
|
baseURL + "/wiki/Items/Deployables",
|
||
|
|
baseURL + "/wiki/Items/Other",
|
||
|
|
baseURL + "/wiki/Items/Item_Values",
|
||
|
|
},
|
||
|
|
CraftingSeeds: []string{
|
||
|
|
baseURL + "/wiki/Crafting",
|
||
|
|
baseURL + "/wiki/Crafting/Survival",
|
||
|
|
baseURL + "/wiki/Crafting/Cooking",
|
||
|
|
baseURL + "/wiki/Crafting/Alchemy",
|
||
|
|
},
|
||
|
|
IgnoredPrefixes: []string{
|
||
|
|
"/wiki/File:",
|
||
|
|
"/wiki/Category:",
|
||
|
|
"/wiki/Special:",
|
||
|
|
"/wiki/Help:",
|
||
|
|
"/wiki/Template:",
|
||
|
|
"/wiki/User:",
|
||
|
|
"/wiki/User_blog:",
|
||
|
|
"/wiki/Forum:",
|
||
|
|
"/wiki/Message_Wall:",
|
||
|
|
"/wiki/Thread:",
|
||
|
|
"/wiki/Map:",
|
||
|
|
},
|
||
|
|
IgnoredExact: map[string]bool{
|
||
|
|
"/wiki/Outward_Wiki": true,
|
||
|
|
"/wiki/Items": true,
|
||
|
|
"/wiki/Crafting": false,
|
||
|
|
},
|
||
|
|
ItemListPathRe: regexp.MustCompile(`^/wiki/Items(?:/|$)`),
|
||
|
|
CraftingPathRe: regexp.MustCompile(`^/wiki/Crafting(?:/|$)`),
|
||
|
|
AmountPrefixRe: regexp.MustCompile(`^\s*(\d+x)\s+`),
|
||
|
|
WhitespaceRe: regexp.MustCompile(`\s+`),
|
||
|
|
SpinnerFrames: []string{
|
||
|
|
"⠋",
|
||
|
|
"⠙",
|
||
|
|
"⠹",
|
||
|
|
"⠸",
|
||
|
|
"⠼",
|
||
|
|
"⠴",
|
||
|
|
"⠦",
|
||
|
|
"⠧",
|
||
|
|
"⠇",
|
||
|
|
"⠏",
|
||
|
|
},
|
||
|
|
}
|
||
|
|
}
|