+ Work on InkBunny

+ CLI
This commit is contained in:
Daniel Legt 2021-11-02 23:05:49 +02:00
parent 9acbd6b93f
commit e64142f436
6 changed files with 158 additions and 10 deletions

28
src/cli.ts Normal file
View File

@ -0,0 +1,28 @@
import {runTest as runRule34XXX} from "./test/rule34xxx";
var url: URL;
try {
url = new URL(process.argv[2]);
} catch ( err ) {
console.error(err);
process.exit(99);
}
let pageCount: number = 30;
if ( process.argv.length >= 4 ) {
if ( !isNaN((process.argv[3] as any)) ) {
pageCount = Number.parseInt(process.argv[3]);
}
}
main();
async function main() {
const results = await runRule34XXX(process.argv[2], pageCount, true);
process.stdout.write(String(results));
process.stdout.end();
process.exit(0);
}

View File

@ -5,7 +5,7 @@ export function getPageContents(url: string): Promise<axiosPackage.AxiosResponse
// Return the axios function's promise // Return the axios function's promise
return axios.get(url, { return axios.get(url, {
headers: { headers: {
'User-Agent': 'Mozilla/5.0', 'User-Agent': 'Mozilla/5.0 (Windows NT x.y; Win64; x64; rv:10.0) Gecko/20100101 Firefox/10.0',
} }
}); });
} }

83
src/module/inkbunny.ts Normal file
View File

@ -0,0 +1,83 @@
import { Post, Tag, LogEntry, LogType } from "../type/generic";
import { Scrapper } from "../class/Scrapper";
import { getPageContents } from "../helper/requestManager";
import * as cheerio from 'cheerio';
export class InkBunny extends Scrapper {
constructor() {
// Set the domain base of the current Scrapper as "rule34.xxx"
super("https://inkbunny.net");
}
/**
* Get a list of posts from the mentioned page
*/
public async getPostsFromPage(url: string): Promise<Array<string>> {
// Check if the provided link is valid
if (!this.checkURLBase(url)) {
throw new Error(`Invalid url provided`);
}
// Initialize the page contents here
let pageContents: string = null;
// Send out the request to grab the contents of the post
try {
if (this.verbose) {
console.error(`Sniffing page...`);
}
// Send out the initial Axios request to fetch the data from the page
await getPageContents(url)
.then(request => {
if (request.status < 200 || request.status > 299) {
this.logs.push({
msg: `Invalid response code[${request.status}]`,
type: LogType.ERROR,
err: null,
data: null,
ts: new Date()
});
throw new Error(`Invalid response code[${request.status}]`);
}
pageContents = (request.data as string);
})
} catch (err) {
// "Handle" the error so that it's in the above .catch
this.logs.push({
msg: `[Error]::getPostsFromPage::`,
type: LogType.ERROR,
err: (err as Error),
data: null,
ts: new Date()
});
throw err;
}
// Process the page's posts with cheerio
const $ = cheerio.load((pageContents as string));
// Define the post List
const postList: Array<string> = [];
// Workaround I guess
let self = this;
// Go through all of the posts
$(`.widget_imageFromSubmission a`).each(function () {
const href = $(this).attr(`href`);
if (`${href}`.length >= 4)
postList.push(`${self.domain}/${href}`);
});
if (this.verbose) {
console.error(`Found ${postList.length} posts`);
}
return postList;
}
}

View File

@ -1,6 +1,12 @@
import {runTest as runRule34XXX} from "./test/rule34xxx"; import {runTest as runRule34XXX} from "./test/rule34xxx";
import {runTest as inkbunnytest} from "./test/inkbunny";
console.log(`Testing Rule34.xxx`); console.log(`Testing Rule34.xxx`);
// Running the rule34 test with "Most popular" page // Running the rule34 test with "Most popular" page
runRule34XXX(`https://rule34.xxx/index.php?page=post&s=list&tags=-male%2fmale+-furry+-yaoi+-male_focus+-male_only+-anthro+-3d+-cum+-my_little_pony+-vore+-unknown_species+-fur+-animal`, 50); runRule34XXX(`https://rule34.xxx/index.php?page=post&s=list&tags=-male%2fmale+-furry+-yaoi+-male_focus+-male_only+-anthro+-3d+-cum+-my_little_pony+-vore+-unknown_species+-fur+-animal`, 50);
// !
runRule34XXX(`https://rule34.xxx/index.php?page=post&s=list&tags=superbusty`, 50);
// inkbunnytest();

23
src/test/inkbunny.ts Normal file
View File

@ -0,0 +1,23 @@
// This is the test file for the library, different tests are ran in here.
import { InkBunny } from "../module/inkbunny";
import {Post} from "../type/generic";
import * as fs from "fs/promises";
export async function runTest(startingPage: string = `https://inkbunny.net/gallery/CyanCapsule/1/7413c06e38`, pages: number = 10) {
const bunny = new InkBunny();
bunny.verbose = true;
let postLinks: Array<String> = [];
await bunny.getPostsFromPage(`https://inkbunny.net/gallery/CyanCapsule/1/7413c06e38`)
.then( res => {
postLinks = res;
})
.catch( err => {
console.error(err);
});
console.log(postLinks);
}

View File

@ -3,10 +3,12 @@ import {Rule34xxx} from "../module/rule34xxx";
import {Post} from "../type/generic"; import {Post} from "../type/generic";
import * as fs from "fs/promises"; import * as fs from "fs/promises";
export async function runTest(startingPage: string = `https://rule34.xxx/index.php?page=post&s=list&tags=sort%3Ascore%3Adesc+id%3A%3E4563063&pid=252`, pages: number = 10) { export async function runTest(startingPage: string = `https://rule34.xxx/index.php?page=post&s=list&tags=sort%3Ascore%3Adesc+id%3A%3E4563063&pid=252`, pages: number = 10, doReturn: boolean = false) {
// Initialize the rule34 module // Initialize the rule34 module
const r34: Rule34xxx = new Rule34xxx(); const r34: Rule34xxx = new Rule34xxx();
if ( !doReturn ) {
r34.verbose = true; r34.verbose = true;
}
// Run the get post Details function // Run the get post Details function
let pageList: Array<string>; let pageList: Array<string>;
@ -63,18 +65,24 @@ export async function runTest(startingPage: string = `https://rule34.xxx/index.p
console.error(`err: `, err); console.error(`err: `, err);
}) })
.finally(() => { .finally(() => {
if ( r34.verbose ) {
console.log(`[${i}/${postLinks.length}][${(i/postLinks.length * 100).toFixed(2)}%] Scrapping...`); console.log(`[${i}/${postLinks.length}][${(i/postLinks.length * 100).toFixed(2)}%] Scrapping...`);
}
}) })
} }
console.log(`Done!`);
// Reverse sort them // Reverse sort them
postList = postList.reverse(); postList = postList.reverse();
await fs.writeFile(`./export/r34xxx_pageList_example.json`, JSON.stringify(pageList, null, 4)); if ( doReturn ) {
await fs.writeFile(`./export/r34xxx_postLinks_example.json`, JSON.stringify(postLinks, null, 4)); return JSON.stringify(postList, null, 2);
await fs.writeFile(`./export/r34xxx_postList_example.json`, JSON.stringify(postList, null, 4)); } else {
console.log(`Done!`);
// await fs.writeFile(`./export/r34xxx_pageList_example.json`, JSON.stringify(pageList, null, 4));
// await fs.writeFile(`./export/r34xxx_postLinks_example.json`, JSON.stringify(postLinks, null, 4));
await fs.writeFile(`./export/postList_${new Date().getTime()}.json`, JSON.stringify(postList, null, 4));
}
// Display results // Display results
console.log({ console.log({