import {Post, Tag, LogEntry, LogType} from "../type/generic"; /** * The base class of the scrappers, any of the website scrappers must extend this class */ export class Scrapper { constructor(domain: string) { // Set the domain this.domain = domain; }; /** * An array of all of the logs */ public logs: Array = []; /** * The fully qualified domain base without a trailing / of the website to scrap, for example "https://rule34.life" */ public domain: string = ``; /** * Display console logs */ public verbose: boolean = false; // #region Protected Functions protected checkURLBase(url: string) { try { // Try and build a new URL class const instance: URL = new URL(url); // Check if the origin matches ours if (instance.origin == this.domain) { // Return success return true; } else { this.logs.push({ type: LogType.ERROR, msg: `Invalid URL provided`, data: { url: this.domain, origin: instance.origin }, err: null, ts: new Date(), }); } } catch ( err ) { this.logs.push({ type: LogType.ERROR, msg: `Failed to parse provided URL`, data: null, err: (err as Error), ts: new Date(), }); } // Return a failure return false; } // #endregion // #region Public Functions /** * Get the details of a specific post * @param url The URL to the post, this must be the actual page which contains the image, tags, etc... */ public async getPostDetails( url: string ): Promise { return null; } /** * Get a list of posts from the mentioned page * @param url * @returns */ public async getPostsFromPage( url: string ): Promise> { return []; } /** * Get a list of pages by starting to crawl from a specific page. * @param url The starting page, this will crawl as many pages as you mention * @param pageCount The number of pages to crawl */ public async crawlPages( url: string, pageCount: number = 10 ): Promise> { return []; } // #endregion }