parent
							
								
									9acbd6b93f
								
							
						
					
					
						commit
						e64142f436
					
				| 
						 | 
				
			
			@ -0,0 +1,28 @@
 | 
			
		|||
import {runTest as runRule34XXX} from "./test/rule34xxx";
 | 
			
		||||
 | 
			
		||||
var url: URL;
 | 
			
		||||
 | 
			
		||||
try {
 | 
			
		||||
    url = new URL(process.argv[2]);
 | 
			
		||||
} catch ( err ) {
 | 
			
		||||
    console.error(err);
 | 
			
		||||
    process.exit(99);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
let pageCount: number = 30;
 | 
			
		||||
if ( process.argv.length >= 4 ) {
 | 
			
		||||
    if ( !isNaN((process.argv[3] as any)) ) {
 | 
			
		||||
        pageCount = Number.parseInt(process.argv[3]);
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
main();
 | 
			
		||||
 | 
			
		||||
async function main() {
 | 
			
		||||
    const results = await runRule34XXX(process.argv[2], pageCount, true);
 | 
			
		||||
    
 | 
			
		||||
    process.stdout.write(String(results));
 | 
			
		||||
    process.stdout.end();
 | 
			
		||||
    process.exit(0);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -5,7 +5,7 @@ export function getPageContents(url: string): Promise<axiosPackage.AxiosResponse
 | 
			
		|||
    // Return the axios function's promise
 | 
			
		||||
    return axios.get(url, {
 | 
			
		||||
        headers: { 
 | 
			
		||||
            'User-Agent': 'Mozilla/5.0',
 | 
			
		||||
            'User-Agent': 'Mozilla/5.0 (Windows NT x.y; Win64; x64; rv:10.0) Gecko/20100101 Firefox/10.0',
 | 
			
		||||
        }
 | 
			
		||||
    });
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			@ -0,0 +1,83 @@
 | 
			
		|||
import { Post, Tag, LogEntry, LogType } from "../type/generic";
 | 
			
		||||
import { Scrapper } from "../class/Scrapper";
 | 
			
		||||
import { getPageContents } from "../helper/requestManager";
 | 
			
		||||
import * as cheerio from 'cheerio';
 | 
			
		||||
 | 
			
		||||
export class InkBunny extends Scrapper {
 | 
			
		||||
 | 
			
		||||
    constructor() {
 | 
			
		||||
        // Set the domain base of the current Scrapper as "rule34.xxx"
 | 
			
		||||
        super("https://inkbunny.net");
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /**
 | 
			
		||||
     * Get a list of posts from the mentioned page
 | 
			
		||||
     */
 | 
			
		||||
    public async getPostsFromPage(url: string): Promise<Array<string>> {
 | 
			
		||||
 | 
			
		||||
        // Check if the provided link is valid
 | 
			
		||||
        if (!this.checkURLBase(url)) {
 | 
			
		||||
            throw new Error(`Invalid url provided`);
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        // Initialize the page contents here
 | 
			
		||||
        let pageContents: string = null;
 | 
			
		||||
 | 
			
		||||
        // Send out the request to grab the contents of the post
 | 
			
		||||
        try {
 | 
			
		||||
            if (this.verbose) {
 | 
			
		||||
                console.error(`Sniffing page...`);
 | 
			
		||||
            }
 | 
			
		||||
            // Send out the initial Axios request to fetch the data from the page
 | 
			
		||||
            await getPageContents(url)
 | 
			
		||||
                .then(request => {
 | 
			
		||||
                    if (request.status < 200 || request.status > 299) {
 | 
			
		||||
                        this.logs.push({
 | 
			
		||||
                            msg: `Invalid response code[${request.status}]`,
 | 
			
		||||
                            type: LogType.ERROR,
 | 
			
		||||
                            err: null,
 | 
			
		||||
                            data: null,
 | 
			
		||||
                            ts: new Date()
 | 
			
		||||
                        });
 | 
			
		||||
                        throw new Error(`Invalid response code[${request.status}]`);
 | 
			
		||||
                    }
 | 
			
		||||
 | 
			
		||||
                    pageContents = (request.data as string);
 | 
			
		||||
                })
 | 
			
		||||
        } catch (err) {
 | 
			
		||||
            // "Handle" the error so that it's in the above .catch
 | 
			
		||||
            this.logs.push({
 | 
			
		||||
                msg: `[Error]::getPostsFromPage::`,
 | 
			
		||||
                type: LogType.ERROR,
 | 
			
		||||
                err: (err as Error),
 | 
			
		||||
                data: null,
 | 
			
		||||
                ts: new Date()
 | 
			
		||||
            });
 | 
			
		||||
            throw err;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        // Process the page's posts with cheerio
 | 
			
		||||
        const $ = cheerio.load((pageContents as string));
 | 
			
		||||
 | 
			
		||||
        // Define the post List
 | 
			
		||||
        const postList: Array<string> = [];
 | 
			
		||||
 | 
			
		||||
        // Workaround I guess
 | 
			
		||||
        let self = this;
 | 
			
		||||
 | 
			
		||||
        // Go through all of the posts
 | 
			
		||||
        $(`.widget_imageFromSubmission a`).each(function () {
 | 
			
		||||
            const href = $(this).attr(`href`);
 | 
			
		||||
            if (`${href}`.length >= 4)
 | 
			
		||||
                postList.push(`${self.domain}/${href}`);
 | 
			
		||||
        });
 | 
			
		||||
 | 
			
		||||
        if (this.verbose) {
 | 
			
		||||
            console.error(`Found ${postList.length} posts`);
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        return postList;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			@ -1,6 +1,12 @@
 | 
			
		|||
import {runTest as runRule34XXX} from "./test/rule34xxx";
 | 
			
		||||
import {runTest as inkbunnytest} from "./test/inkbunny";
 | 
			
		||||
 | 
			
		||||
console.log(`Testing Rule34.xxx`);
 | 
			
		||||
 | 
			
		||||
// Running the rule34 test with "Most popular" page
 | 
			
		||||
runRule34XXX(`https://rule34.xxx/index.php?page=post&s=list&tags=-male%2fmale+-furry+-yaoi+-male_focus+-male_only+-anthro+-3d+-cum+-my_little_pony+-vore+-unknown_species+-fur+-animal`, 50);
 | 
			
		||||
runRule34XXX(`https://rule34.xxx/index.php?page=post&s=list&tags=-male%2fmale+-furry+-yaoi+-male_focus+-male_only+-anthro+-3d+-cum+-my_little_pony+-vore+-unknown_species+-fur+-animal`, 50);
 | 
			
		||||
 | 
			
		||||
// !
 | 
			
		||||
runRule34XXX(`https://rule34.xxx/index.php?page=post&s=list&tags=superbusty`, 50);
 | 
			
		||||
 | 
			
		||||
// inkbunnytest();
 | 
			
		||||
| 
						 | 
				
			
			@ -0,0 +1,23 @@
 | 
			
		|||
// This is the test file for the library, different tests are ran in here.
 | 
			
		||||
import { InkBunny } from "../module/inkbunny";
 | 
			
		||||
import {Post} from "../type/generic";
 | 
			
		||||
import * as fs from "fs/promises";
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
export async function runTest(startingPage: string = `https://inkbunny.net/gallery/CyanCapsule/1/7413c06e38`, pages: number = 10) {
 | 
			
		||||
 | 
			
		||||
    const bunny = new InkBunny();
 | 
			
		||||
    bunny.verbose = true;
 | 
			
		||||
 | 
			
		||||
    let postLinks: Array<String> = [];
 | 
			
		||||
    await bunny.getPostsFromPage(`https://inkbunny.net/gallery/CyanCapsule/1/7413c06e38`)
 | 
			
		||||
    .then( res => {
 | 
			
		||||
        postLinks = res;
 | 
			
		||||
    })
 | 
			
		||||
    .catch( err => {
 | 
			
		||||
        console.error(err);
 | 
			
		||||
    });
 | 
			
		||||
 | 
			
		||||
    console.log(postLinks);
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			@ -3,10 +3,12 @@ import {Rule34xxx} from "../module/rule34xxx";
 | 
			
		|||
import {Post} from "../type/generic";
 | 
			
		||||
import * as fs from "fs/promises";
 | 
			
		||||
 | 
			
		||||
export async function runTest(startingPage: string = `https://rule34.xxx/index.php?page=post&s=list&tags=sort%3Ascore%3Adesc+id%3A%3E4563063&pid=252`, pages: number = 10) {
 | 
			
		||||
export async function runTest(startingPage: string = `https://rule34.xxx/index.php?page=post&s=list&tags=sort%3Ascore%3Adesc+id%3A%3E4563063&pid=252`, pages: number = 10, doReturn: boolean = false) {
 | 
			
		||||
    // Initialize the rule34 module
 | 
			
		||||
    const r34: Rule34xxx = new Rule34xxx();
 | 
			
		||||
    r34.verbose = true;
 | 
			
		||||
    if ( !doReturn ) {
 | 
			
		||||
        r34.verbose = true;
 | 
			
		||||
    }
 | 
			
		||||
    
 | 
			
		||||
    // Run the get post Details function
 | 
			
		||||
    let pageList: Array<string>;
 | 
			
		||||
| 
						 | 
				
			
			@ -63,18 +65,24 @@ export async function runTest(startingPage: string = `https://rule34.xxx/index.p
 | 
			
		|||
            console.error(`err: `, err);
 | 
			
		||||
        })
 | 
			
		||||
        .finally(() => {
 | 
			
		||||
            console.log(`[${i}/${postLinks.length}][${(i/postLinks.length * 100).toFixed(2)}%] Scrapping...`);
 | 
			
		||||
            if ( r34.verbose ) {
 | 
			
		||||
                console.log(`[${i}/${postLinks.length}][${(i/postLinks.length * 100).toFixed(2)}%] Scrapping...`);
 | 
			
		||||
            }
 | 
			
		||||
        })
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    console.log(`Done!`);
 | 
			
		||||
 | 
			
		||||
    // Reverse sort them
 | 
			
		||||
    postList = postList.reverse();
 | 
			
		||||
 | 
			
		||||
    await fs.writeFile(`./export/r34xxx_pageList_example.json`, JSON.stringify(pageList, null, 4));
 | 
			
		||||
    await fs.writeFile(`./export/r34xxx_postLinks_example.json`, JSON.stringify(postLinks, null, 4));
 | 
			
		||||
    await fs.writeFile(`./export/r34xxx_postList_example.json`, JSON.stringify(postList, null, 4));
 | 
			
		||||
    if ( doReturn ) {
 | 
			
		||||
        return JSON.stringify(postList, null, 2);
 | 
			
		||||
    } else {
 | 
			
		||||
        console.log(`Done!`);
 | 
			
		||||
        // await fs.writeFile(`./export/r34xxx_pageList_example.json`, JSON.stringify(pageList, null, 4));
 | 
			
		||||
        // await fs.writeFile(`./export/r34xxx_postLinks_example.json`, JSON.stringify(postLinks, null, 4));
 | 
			
		||||
        await fs.writeFile(`./export/postList_${new Date().getTime()}.json`, JSON.stringify(postList, null, 4));
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    // Display results
 | 
			
		||||
    console.log({
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Reference in New Issue