parent
9acbd6b93f
commit
e64142f436
|
@ -0,0 +1,28 @@
|
||||||
|
import {runTest as runRule34XXX} from "./test/rule34xxx";
|
||||||
|
|
||||||
|
var url: URL;
|
||||||
|
|
||||||
|
try {
|
||||||
|
url = new URL(process.argv[2]);
|
||||||
|
} catch ( err ) {
|
||||||
|
console.error(err);
|
||||||
|
process.exit(99);
|
||||||
|
}
|
||||||
|
|
||||||
|
let pageCount: number = 30;
|
||||||
|
if ( process.argv.length >= 4 ) {
|
||||||
|
if ( !isNaN((process.argv[3] as any)) ) {
|
||||||
|
pageCount = Number.parseInt(process.argv[3]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
main();
|
||||||
|
|
||||||
|
async function main() {
|
||||||
|
const results = await runRule34XXX(process.argv[2], pageCount, true);
|
||||||
|
|
||||||
|
process.stdout.write(String(results));
|
||||||
|
process.stdout.end();
|
||||||
|
process.exit(0);
|
||||||
|
}
|
||||||
|
|
|
@ -5,7 +5,7 @@ export function getPageContents(url: string): Promise<axiosPackage.AxiosResponse
|
||||||
// Return the axios function's promise
|
// Return the axios function's promise
|
||||||
return axios.get(url, {
|
return axios.get(url, {
|
||||||
headers: {
|
headers: {
|
||||||
'User-Agent': 'Mozilla/5.0',
|
'User-Agent': 'Mozilla/5.0 (Windows NT x.y; Win64; x64; rv:10.0) Gecko/20100101 Firefox/10.0',
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
}
|
}
|
|
@ -0,0 +1,83 @@
|
||||||
|
import { Post, Tag, LogEntry, LogType } from "../type/generic";
|
||||||
|
import { Scrapper } from "../class/Scrapper";
|
||||||
|
import { getPageContents } from "../helper/requestManager";
|
||||||
|
import * as cheerio from 'cheerio';
|
||||||
|
|
||||||
|
export class InkBunny extends Scrapper {
|
||||||
|
|
||||||
|
constructor() {
|
||||||
|
// Set the domain base of the current Scrapper as "rule34.xxx"
|
||||||
|
super("https://inkbunny.net");
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get a list of posts from the mentioned page
|
||||||
|
*/
|
||||||
|
public async getPostsFromPage(url: string): Promise<Array<string>> {
|
||||||
|
|
||||||
|
// Check if the provided link is valid
|
||||||
|
if (!this.checkURLBase(url)) {
|
||||||
|
throw new Error(`Invalid url provided`);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Initialize the page contents here
|
||||||
|
let pageContents: string = null;
|
||||||
|
|
||||||
|
// Send out the request to grab the contents of the post
|
||||||
|
try {
|
||||||
|
if (this.verbose) {
|
||||||
|
console.error(`Sniffing page...`);
|
||||||
|
}
|
||||||
|
// Send out the initial Axios request to fetch the data from the page
|
||||||
|
await getPageContents(url)
|
||||||
|
.then(request => {
|
||||||
|
if (request.status < 200 || request.status > 299) {
|
||||||
|
this.logs.push({
|
||||||
|
msg: `Invalid response code[${request.status}]`,
|
||||||
|
type: LogType.ERROR,
|
||||||
|
err: null,
|
||||||
|
data: null,
|
||||||
|
ts: new Date()
|
||||||
|
});
|
||||||
|
throw new Error(`Invalid response code[${request.status}]`);
|
||||||
|
}
|
||||||
|
|
||||||
|
pageContents = (request.data as string);
|
||||||
|
})
|
||||||
|
} catch (err) {
|
||||||
|
// "Handle" the error so that it's in the above .catch
|
||||||
|
this.logs.push({
|
||||||
|
msg: `[Error]::getPostsFromPage::`,
|
||||||
|
type: LogType.ERROR,
|
||||||
|
err: (err as Error),
|
||||||
|
data: null,
|
||||||
|
ts: new Date()
|
||||||
|
});
|
||||||
|
throw err;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Process the page's posts with cheerio
|
||||||
|
const $ = cheerio.load((pageContents as string));
|
||||||
|
|
||||||
|
// Define the post List
|
||||||
|
const postList: Array<string> = [];
|
||||||
|
|
||||||
|
// Workaround I guess
|
||||||
|
let self = this;
|
||||||
|
|
||||||
|
// Go through all of the posts
|
||||||
|
$(`.widget_imageFromSubmission a`).each(function () {
|
||||||
|
const href = $(this).attr(`href`);
|
||||||
|
if (`${href}`.length >= 4)
|
||||||
|
postList.push(`${self.domain}/${href}`);
|
||||||
|
});
|
||||||
|
|
||||||
|
if (this.verbose) {
|
||||||
|
console.error(`Found ${postList.length} posts`);
|
||||||
|
}
|
||||||
|
|
||||||
|
return postList;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
|
@ -1,6 +1,12 @@
|
||||||
import {runTest as runRule34XXX} from "./test/rule34xxx";
|
import {runTest as runRule34XXX} from "./test/rule34xxx";
|
||||||
|
import {runTest as inkbunnytest} from "./test/inkbunny";
|
||||||
|
|
||||||
console.log(`Testing Rule34.xxx`);
|
console.log(`Testing Rule34.xxx`);
|
||||||
|
|
||||||
// Running the rule34 test with "Most popular" page
|
// Running the rule34 test with "Most popular" page
|
||||||
runRule34XXX(`https://rule34.xxx/index.php?page=post&s=list&tags=-male%2fmale+-furry+-yaoi+-male_focus+-male_only+-anthro+-3d+-cum+-my_little_pony+-vore+-unknown_species+-fur+-animal`, 50);
|
runRule34XXX(`https://rule34.xxx/index.php?page=post&s=list&tags=-male%2fmale+-furry+-yaoi+-male_focus+-male_only+-anthro+-3d+-cum+-my_little_pony+-vore+-unknown_species+-fur+-animal`, 50);
|
||||||
|
|
||||||
|
// !
|
||||||
|
runRule34XXX(`https://rule34.xxx/index.php?page=post&s=list&tags=superbusty`, 50);
|
||||||
|
|
||||||
|
// inkbunnytest();
|
|
@ -0,0 +1,23 @@
|
||||||
|
// This is the test file for the library, different tests are ran in here.
|
||||||
|
import { InkBunny } from "../module/inkbunny";
|
||||||
|
import {Post} from "../type/generic";
|
||||||
|
import * as fs from "fs/promises";
|
||||||
|
|
||||||
|
|
||||||
|
export async function runTest(startingPage: string = `https://inkbunny.net/gallery/CyanCapsule/1/7413c06e38`, pages: number = 10) {
|
||||||
|
|
||||||
|
const bunny = new InkBunny();
|
||||||
|
bunny.verbose = true;
|
||||||
|
|
||||||
|
let postLinks: Array<String> = [];
|
||||||
|
await bunny.getPostsFromPage(`https://inkbunny.net/gallery/CyanCapsule/1/7413c06e38`)
|
||||||
|
.then( res => {
|
||||||
|
postLinks = res;
|
||||||
|
})
|
||||||
|
.catch( err => {
|
||||||
|
console.error(err);
|
||||||
|
});
|
||||||
|
|
||||||
|
console.log(postLinks);
|
||||||
|
|
||||||
|
}
|
|
@ -3,10 +3,12 @@ import {Rule34xxx} from "../module/rule34xxx";
|
||||||
import {Post} from "../type/generic";
|
import {Post} from "../type/generic";
|
||||||
import * as fs from "fs/promises";
|
import * as fs from "fs/promises";
|
||||||
|
|
||||||
export async function runTest(startingPage: string = `https://rule34.xxx/index.php?page=post&s=list&tags=sort%3Ascore%3Adesc+id%3A%3E4563063&pid=252`, pages: number = 10) {
|
export async function runTest(startingPage: string = `https://rule34.xxx/index.php?page=post&s=list&tags=sort%3Ascore%3Adesc+id%3A%3E4563063&pid=252`, pages: number = 10, doReturn: boolean = false) {
|
||||||
// Initialize the rule34 module
|
// Initialize the rule34 module
|
||||||
const r34: Rule34xxx = new Rule34xxx();
|
const r34: Rule34xxx = new Rule34xxx();
|
||||||
r34.verbose = true;
|
if ( !doReturn ) {
|
||||||
|
r34.verbose = true;
|
||||||
|
}
|
||||||
|
|
||||||
// Run the get post Details function
|
// Run the get post Details function
|
||||||
let pageList: Array<string>;
|
let pageList: Array<string>;
|
||||||
|
@ -63,18 +65,24 @@ export async function runTest(startingPage: string = `https://rule34.xxx/index.p
|
||||||
console.error(`err: `, err);
|
console.error(`err: `, err);
|
||||||
})
|
})
|
||||||
.finally(() => {
|
.finally(() => {
|
||||||
console.log(`[${i}/${postLinks.length}][${(i/postLinks.length * 100).toFixed(2)}%] Scrapping...`);
|
if ( r34.verbose ) {
|
||||||
|
console.log(`[${i}/${postLinks.length}][${(i/postLinks.length * 100).toFixed(2)}%] Scrapping...`);
|
||||||
|
}
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
console.log(`Done!`);
|
|
||||||
|
|
||||||
// Reverse sort them
|
// Reverse sort them
|
||||||
postList = postList.reverse();
|
postList = postList.reverse();
|
||||||
|
|
||||||
await fs.writeFile(`./export/r34xxx_pageList_example.json`, JSON.stringify(pageList, null, 4));
|
if ( doReturn ) {
|
||||||
await fs.writeFile(`./export/r34xxx_postLinks_example.json`, JSON.stringify(postLinks, null, 4));
|
return JSON.stringify(postList, null, 2);
|
||||||
await fs.writeFile(`./export/r34xxx_postList_example.json`, JSON.stringify(postList, null, 4));
|
} else {
|
||||||
|
console.log(`Done!`);
|
||||||
|
// await fs.writeFile(`./export/r34xxx_pageList_example.json`, JSON.stringify(pageList, null, 4));
|
||||||
|
// await fs.writeFile(`./export/r34xxx_postLinks_example.json`, JSON.stringify(postLinks, null, 4));
|
||||||
|
await fs.writeFile(`./export/postList_${new Date().getTime()}.json`, JSON.stringify(postList, null, 4));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
// Display results
|
// Display results
|
||||||
console.log({
|
console.log({
|
||||||
|
|
Reference in New Issue