Initial COmmit
This commit is contained in:
102
internal/scraper/config.go
Normal file
102
internal/scraper/config.go
Normal file
@@ -0,0 +1,102 @@
|
||||
package scraper
|
||||
|
||||
import (
|
||||
"regexp"
|
||||
"time"
|
||||
)
|
||||
|
||||
type Config struct {
|
||||
BaseURL string
|
||||
AllowedDomain string
|
||||
MaxDepth int
|
||||
MaxRetries int
|
||||
MaxQueuedPages int
|
||||
RequestDelay time.Duration
|
||||
RequestJitter time.Duration
|
||||
RequestTimeout time.Duration
|
||||
RetryBaseDelay time.Duration
|
||||
ProgressEvery time.Duration
|
||||
BrowserReferrer string
|
||||
BrowserAgents []string
|
||||
ItemSeeds []string
|
||||
CraftingSeeds []string
|
||||
IgnoredPrefixes []string
|
||||
IgnoredExact map[string]bool
|
||||
ItemListPathRe *regexp.Regexp
|
||||
CraftingPathRe *regexp.Regexp
|
||||
AmountPrefixRe *regexp.Regexp
|
||||
WhitespaceRe *regexp.Regexp
|
||||
SpinnerFrames []string
|
||||
}
|
||||
|
||||
func DefaultConfig() Config {
|
||||
baseURL := "https://outward.fandom.com"
|
||||
|
||||
return Config{
|
||||
BaseURL: baseURL,
|
||||
AllowedDomain: "outward.fandom.com",
|
||||
MaxDepth: 3,
|
||||
MaxRetries: 2,
|
||||
MaxQueuedPages: 1500,
|
||||
RequestDelay: 650 * time.Millisecond,
|
||||
RequestJitter: 350 * time.Millisecond,
|
||||
RequestTimeout: 8 * time.Second,
|
||||
RetryBaseDelay: 1200 * time.Millisecond,
|
||||
ProgressEvery: 3 * time.Second,
|
||||
BrowserReferrer: baseURL + "/",
|
||||
BrowserAgents: []string{
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36",
|
||||
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36",
|
||||
},
|
||||
ItemSeeds: []string{
|
||||
baseURL + "/wiki/Items/Weapons",
|
||||
baseURL + "/wiki/Items/Equipment",
|
||||
baseURL + "/wiki/Items/Consumables",
|
||||
baseURL + "/wiki/Items/Ingredients",
|
||||
baseURL + "/wiki/Items/Deployables",
|
||||
baseURL + "/wiki/Items/Other",
|
||||
baseURL + "/wiki/Items/Item_Values",
|
||||
},
|
||||
CraftingSeeds: []string{
|
||||
baseURL + "/wiki/Crafting",
|
||||
baseURL + "/wiki/Crafting/Survival",
|
||||
baseURL + "/wiki/Crafting/Cooking",
|
||||
baseURL + "/wiki/Crafting/Alchemy",
|
||||
},
|
||||
IgnoredPrefixes: []string{
|
||||
"/wiki/File:",
|
||||
"/wiki/Category:",
|
||||
"/wiki/Special:",
|
||||
"/wiki/Help:",
|
||||
"/wiki/Template:",
|
||||
"/wiki/User:",
|
||||
"/wiki/User_blog:",
|
||||
"/wiki/Forum:",
|
||||
"/wiki/Message_Wall:",
|
||||
"/wiki/Thread:",
|
||||
"/wiki/Map:",
|
||||
},
|
||||
IgnoredExact: map[string]bool{
|
||||
"/wiki/Outward_Wiki": true,
|
||||
"/wiki/Items": true,
|
||||
"/wiki/Crafting": false,
|
||||
},
|
||||
ItemListPathRe: regexp.MustCompile(`^/wiki/Items(?:/|$)`),
|
||||
CraftingPathRe: regexp.MustCompile(`^/wiki/Crafting(?:/|$)`),
|
||||
AmountPrefixRe: regexp.MustCompile(`^\s*(\d+x)\s+`),
|
||||
WhitespaceRe: regexp.MustCompile(`\s+`),
|
||||
SpinnerFrames: []string{
|
||||
"⠋",
|
||||
"⠙",
|
||||
"⠹",
|
||||
"⠸",
|
||||
"⠼",
|
||||
"⠴",
|
||||
"⠦",
|
||||
"⠧",
|
||||
"⠇",
|
||||
"⠏",
|
||||
},
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user