* Started organizing tests
This commit is contained in:
parent
50660517f5
commit
fd075d060a
83
src/test.ts
83
src/test.ts
|
@ -1,81 +1,6 @@
|
||||||
// This is the test file for the library, different tests are ran in here.
|
import {runTest as runRule34XXX} from "./test/rule34xxx";
|
||||||
import {Rule34xxx} from "./module/rule34xxx";
|
|
||||||
import {Post} from "./type/generic";
|
|
||||||
import * as fs from "fs/promises";
|
|
||||||
|
|
||||||
( async () => {
|
console.log(`Testing Rule34.xxx`);
|
||||||
// Initialize the rule34 module
|
|
||||||
const r34: Rule34xxx = new Rule34xxx();
|
|
||||||
r34.verbose = true;
|
|
||||||
|
|
||||||
// Run the get post Details function
|
|
||||||
let pageList: Array<string>;
|
|
||||||
await r34.crawlPages(`https://rule34.xxx/index.php?page=post&s=list&tags=sort%3Ascore%3Adesc+id%3A%3E4563063&pid=252`, 20)
|
|
||||||
.then( postData => {
|
|
||||||
pageList = postData;
|
|
||||||
})
|
|
||||||
.catch( err => {
|
|
||||||
console.log(err);
|
|
||||||
});
|
|
||||||
|
|
||||||
// Now grab all posts on all of those pages
|
|
||||||
let postLinks: Array<string> = [];
|
|
||||||
for ( let page of pageList ) {
|
|
||||||
await r34.getPostsFromPage(page)
|
|
||||||
.then( posts => {
|
|
||||||
// Combine the two arrays
|
|
||||||
postLinks = [...postLinks, ...posts];
|
|
||||||
})
|
|
||||||
.catch( err => {
|
|
||||||
console.error(err);
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* The definitive list of posts
|
|
||||||
*/
|
|
||||||
const postList: Array<Post> = [];
|
|
||||||
|
|
||||||
/**
|
|
||||||
* The amount of posts to fetch per cycle
|
|
||||||
*/
|
|
||||||
const concurrency: number = 3;
|
|
||||||
|
|
||||||
for ( let i = 0; i < postLinks.length; i++ ) {
|
|
||||||
const promiseList: Array<Promise<Post>> = [];
|
|
||||||
for ( let j = 0; j < concurrency; j++ ) {
|
|
||||||
// Add the link to the crawler's buffer
|
|
||||||
promiseList.push(r34.getPostDetails(postLinks[i]));
|
|
||||||
i++;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Wait for all promises to settle
|
|
||||||
await Promise.allSettled(promiseList)
|
|
||||||
.then( result => {
|
|
||||||
// Append the results to the postList
|
|
||||||
for ( let p of result ) {
|
|
||||||
postList.push(((p as any).value as Post));
|
|
||||||
}
|
|
||||||
})
|
|
||||||
.catch( err => {
|
|
||||||
console.error(`err: `, err);
|
|
||||||
})
|
|
||||||
.finally(() => {
|
|
||||||
console.log(`[${i}/${postLinks.length}][${(i/postLinks.length * 100).toFixed(2)}%] Scrapping...`);
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
console.log(`Done!`);
|
|
||||||
|
|
||||||
await fs.writeFile(`./export/r34xxx_pageList_example.json`, JSON.stringify(pageList, null, 4));
|
|
||||||
await fs.writeFile(`./export/r34xxx_postLinks_example.json`, JSON.stringify(postLinks, null, 4));
|
|
||||||
await fs.writeFile(`./export/r34xxx_postList_example.json`, JSON.stringify(postList, null, 4));
|
|
||||||
|
|
||||||
// Display results
|
|
||||||
console.log({
|
|
||||||
pageList: pageList.length,
|
|
||||||
postLinks: postLinks.length,
|
|
||||||
postList: postList.length,
|
|
||||||
});
|
|
||||||
})();
|
|
||||||
|
|
||||||
|
// Running the rule34 test with "Most popular" page
|
||||||
|
runRule34XXX(`https://rule34.xxx/index.php?page=post&s=list&tags=sort%3Ascore%3Adesc+id%3A%3E4563063&pid=252`);
|
|
@ -0,0 +1,80 @@
|
||||||
|
// This is the test file for the library, different tests are ran in here.
|
||||||
|
import {Rule34xxx} from "../module/rule34xxx";
|
||||||
|
import {Post} from "../type/generic";
|
||||||
|
import * as fs from "fs/promises";
|
||||||
|
|
||||||
|
export async function runTest(startingPage: string = `https://rule34.xxx/index.php?page=post&s=list&tags=sort%3Ascore%3Adesc+id%3A%3E4563063&pid=252`) {
|
||||||
|
// Initialize the rule34 module
|
||||||
|
const r34: Rule34xxx = new Rule34xxx();
|
||||||
|
r34.verbose = true;
|
||||||
|
|
||||||
|
// Run the get post Details function
|
||||||
|
let pageList: Array<string>;
|
||||||
|
await r34.crawlPages(startingPage, 20)
|
||||||
|
.then( postData => {
|
||||||
|
pageList = postData;
|
||||||
|
})
|
||||||
|
.catch( err => {
|
||||||
|
console.log(err);
|
||||||
|
});
|
||||||
|
|
||||||
|
// Now grab all posts on all of those pages
|
||||||
|
let postLinks: Array<string> = [];
|
||||||
|
for ( let page of pageList ) {
|
||||||
|
await r34.getPostsFromPage(page)
|
||||||
|
.then( posts => {
|
||||||
|
// Combine the two arrays
|
||||||
|
postLinks = [...postLinks, ...posts];
|
||||||
|
})
|
||||||
|
.catch( err => {
|
||||||
|
console.error(err);
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The definitive list of posts
|
||||||
|
*/
|
||||||
|
const postList: Array<Post> = [];
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The amount of posts to fetch per cycle
|
||||||
|
*/
|
||||||
|
const concurrency: number = 3;
|
||||||
|
|
||||||
|
for ( let i = 0; i < postLinks.length; i++ ) {
|
||||||
|
const promiseList: Array<Promise<Post>> = [];
|
||||||
|
for ( let j = 0; j < concurrency; j++ ) {
|
||||||
|
// Add the link to the crawler's buffer
|
||||||
|
promiseList.push(r34.getPostDetails(postLinks[i]));
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Wait for all promises to settle
|
||||||
|
await Promise.allSettled(promiseList)
|
||||||
|
.then( result => {
|
||||||
|
// Append the results to the postList
|
||||||
|
for ( let p of result ) {
|
||||||
|
postList.push(((p as any).value as Post));
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.catch( err => {
|
||||||
|
console.error(`err: `, err);
|
||||||
|
})
|
||||||
|
.finally(() => {
|
||||||
|
console.log(`[${i}/${postLinks.length}][${(i/postLinks.length * 100).toFixed(2)}%] Scrapping...`);
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(`Done!`);
|
||||||
|
|
||||||
|
await fs.writeFile(`./export/r34xxx_pageList_example.json`, JSON.stringify(pageList, null, 4));
|
||||||
|
await fs.writeFile(`./export/r34xxx_postLinks_example.json`, JSON.stringify(postLinks, null, 4));
|
||||||
|
await fs.writeFile(`./export/r34xxx_postList_example.json`, JSON.stringify(postList, null, 4));
|
||||||
|
|
||||||
|
// Display results
|
||||||
|
console.log({
|
||||||
|
pageList: pageList.length,
|
||||||
|
postLinks: postLinks.length,
|
||||||
|
postList: postList.length,
|
||||||
|
});
|
||||||
|
}
|
Reference in New Issue