parent
9acbd6b93f
commit
e64142f436
|
@ -0,0 +1,28 @@
|
|||
import {runTest as runRule34XXX} from "./test/rule34xxx";
|
||||
|
||||
var url: URL;
|
||||
|
||||
try {
|
||||
url = new URL(process.argv[2]);
|
||||
} catch ( err ) {
|
||||
console.error(err);
|
||||
process.exit(99);
|
||||
}
|
||||
|
||||
let pageCount: number = 30;
|
||||
if ( process.argv.length >= 4 ) {
|
||||
if ( !isNaN((process.argv[3] as any)) ) {
|
||||
pageCount = Number.parseInt(process.argv[3]);
|
||||
}
|
||||
}
|
||||
|
||||
main();
|
||||
|
||||
async function main() {
|
||||
const results = await runRule34XXX(process.argv[2], pageCount, true);
|
||||
|
||||
process.stdout.write(String(results));
|
||||
process.stdout.end();
|
||||
process.exit(0);
|
||||
}
|
||||
|
|
@ -5,7 +5,7 @@ export function getPageContents(url: string): Promise<axiosPackage.AxiosResponse
|
|||
// Return the axios function's promise
|
||||
return axios.get(url, {
|
||||
headers: {
|
||||
'User-Agent': 'Mozilla/5.0',
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT x.y; Win64; x64; rv:10.0) Gecko/20100101 Firefox/10.0',
|
||||
}
|
||||
});
|
||||
}
|
|
@ -0,0 +1,83 @@
|
|||
import { Post, Tag, LogEntry, LogType } from "../type/generic";
|
||||
import { Scrapper } from "../class/Scrapper";
|
||||
import { getPageContents } from "../helper/requestManager";
|
||||
import * as cheerio from 'cheerio';
|
||||
|
||||
export class InkBunny extends Scrapper {
|
||||
|
||||
constructor() {
|
||||
// Set the domain base of the current Scrapper as "rule34.xxx"
|
||||
super("https://inkbunny.net");
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a list of posts from the mentioned page
|
||||
*/
|
||||
public async getPostsFromPage(url: string): Promise<Array<string>> {
|
||||
|
||||
// Check if the provided link is valid
|
||||
if (!this.checkURLBase(url)) {
|
||||
throw new Error(`Invalid url provided`);
|
||||
}
|
||||
|
||||
// Initialize the page contents here
|
||||
let pageContents: string = null;
|
||||
|
||||
// Send out the request to grab the contents of the post
|
||||
try {
|
||||
if (this.verbose) {
|
||||
console.error(`Sniffing page...`);
|
||||
}
|
||||
// Send out the initial Axios request to fetch the data from the page
|
||||
await getPageContents(url)
|
||||
.then(request => {
|
||||
if (request.status < 200 || request.status > 299) {
|
||||
this.logs.push({
|
||||
msg: `Invalid response code[${request.status}]`,
|
||||
type: LogType.ERROR,
|
||||
err: null,
|
||||
data: null,
|
||||
ts: new Date()
|
||||
});
|
||||
throw new Error(`Invalid response code[${request.status}]`);
|
||||
}
|
||||
|
||||
pageContents = (request.data as string);
|
||||
})
|
||||
} catch (err) {
|
||||
// "Handle" the error so that it's in the above .catch
|
||||
this.logs.push({
|
||||
msg: `[Error]::getPostsFromPage::`,
|
||||
type: LogType.ERROR,
|
||||
err: (err as Error),
|
||||
data: null,
|
||||
ts: new Date()
|
||||
});
|
||||
throw err;
|
||||
}
|
||||
|
||||
// Process the page's posts with cheerio
|
||||
const $ = cheerio.load((pageContents as string));
|
||||
|
||||
// Define the post List
|
||||
const postList: Array<string> = [];
|
||||
|
||||
// Workaround I guess
|
||||
let self = this;
|
||||
|
||||
// Go through all of the posts
|
||||
$(`.widget_imageFromSubmission a`).each(function () {
|
||||
const href = $(this).attr(`href`);
|
||||
if (`${href}`.length >= 4)
|
||||
postList.push(`${self.domain}/${href}`);
|
||||
});
|
||||
|
||||
if (this.verbose) {
|
||||
console.error(`Found ${postList.length} posts`);
|
||||
}
|
||||
|
||||
return postList;
|
||||
}
|
||||
|
||||
|
||||
}
|
|
@ -1,6 +1,12 @@
|
|||
import {runTest as runRule34XXX} from "./test/rule34xxx";
|
||||
import {runTest as inkbunnytest} from "./test/inkbunny";
|
||||
|
||||
console.log(`Testing Rule34.xxx`);
|
||||
|
||||
// Running the rule34 test with "Most popular" page
|
||||
runRule34XXX(`https://rule34.xxx/index.php?page=post&s=list&tags=-male%2fmale+-furry+-yaoi+-male_focus+-male_only+-anthro+-3d+-cum+-my_little_pony+-vore+-unknown_species+-fur+-animal`, 50);
|
||||
|
||||
// !
|
||||
runRule34XXX(`https://rule34.xxx/index.php?page=post&s=list&tags=superbusty`, 50);
|
||||
|
||||
// inkbunnytest();
|
|
@ -0,0 +1,23 @@
|
|||
// This is the test file for the library, different tests are ran in here.
|
||||
import { InkBunny } from "../module/inkbunny";
|
||||
import {Post} from "../type/generic";
|
||||
import * as fs from "fs/promises";
|
||||
|
||||
|
||||
export async function runTest(startingPage: string = `https://inkbunny.net/gallery/CyanCapsule/1/7413c06e38`, pages: number = 10) {
|
||||
|
||||
const bunny = new InkBunny();
|
||||
bunny.verbose = true;
|
||||
|
||||
let postLinks: Array<String> = [];
|
||||
await bunny.getPostsFromPage(`https://inkbunny.net/gallery/CyanCapsule/1/7413c06e38`)
|
||||
.then( res => {
|
||||
postLinks = res;
|
||||
})
|
||||
.catch( err => {
|
||||
console.error(err);
|
||||
});
|
||||
|
||||
console.log(postLinks);
|
||||
|
||||
}
|
|
@ -3,10 +3,12 @@ import {Rule34xxx} from "../module/rule34xxx";
|
|||
import {Post} from "../type/generic";
|
||||
import * as fs from "fs/promises";
|
||||
|
||||
export async function runTest(startingPage: string = `https://rule34.xxx/index.php?page=post&s=list&tags=sort%3Ascore%3Adesc+id%3A%3E4563063&pid=252`, pages: number = 10) {
|
||||
export async function runTest(startingPage: string = `https://rule34.xxx/index.php?page=post&s=list&tags=sort%3Ascore%3Adesc+id%3A%3E4563063&pid=252`, pages: number = 10, doReturn: boolean = false) {
|
||||
// Initialize the rule34 module
|
||||
const r34: Rule34xxx = new Rule34xxx();
|
||||
r34.verbose = true;
|
||||
if ( !doReturn ) {
|
||||
r34.verbose = true;
|
||||
}
|
||||
|
||||
// Run the get post Details function
|
||||
let pageList: Array<string>;
|
||||
|
@ -63,18 +65,24 @@ export async function runTest(startingPage: string = `https://rule34.xxx/index.p
|
|||
console.error(`err: `, err);
|
||||
})
|
||||
.finally(() => {
|
||||
console.log(`[${i}/${postLinks.length}][${(i/postLinks.length * 100).toFixed(2)}%] Scrapping...`);
|
||||
if ( r34.verbose ) {
|
||||
console.log(`[${i}/${postLinks.length}][${(i/postLinks.length * 100).toFixed(2)}%] Scrapping...`);
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
console.log(`Done!`);
|
||||
|
||||
// Reverse sort them
|
||||
postList = postList.reverse();
|
||||
|
||||
await fs.writeFile(`./export/r34xxx_pageList_example.json`, JSON.stringify(pageList, null, 4));
|
||||
await fs.writeFile(`./export/r34xxx_postLinks_example.json`, JSON.stringify(postLinks, null, 4));
|
||||
await fs.writeFile(`./export/r34xxx_postList_example.json`, JSON.stringify(postList, null, 4));
|
||||
if ( doReturn ) {
|
||||
return JSON.stringify(postList, null, 2);
|
||||
} else {
|
||||
console.log(`Done!`);
|
||||
// await fs.writeFile(`./export/r34xxx_pageList_example.json`, JSON.stringify(pageList, null, 4));
|
||||
// await fs.writeFile(`./export/r34xxx_postLinks_example.json`, JSON.stringify(postLinks, null, 4));
|
||||
await fs.writeFile(`./export/postList_${new Date().getTime()}.json`, JSON.stringify(postList, null, 4));
|
||||
}
|
||||
|
||||
|
||||
// Display results
|
||||
console.log({
|
||||
|
|
Reference in New Issue