Almost finished with the rule34.xxx module
+ rule34xxx GetPostDetails
This commit is contained in:
parent
2ed81fb668
commit
fe2372dc97
|
@ -21,10 +21,120 @@ export class Rule34xxx extends Scrapper {
|
|||
throw new Error(`Invalid url provided`);
|
||||
}
|
||||
|
||||
// Init the variable here in case of an error
|
||||
let pageContents = null;
|
||||
|
||||
// Send out the request to grab the contents of the post
|
||||
try {
|
||||
// Send out the initial Axios request to fetch the data from the page
|
||||
pageContents = await getPageContents(url);
|
||||
|
||||
if (pageContents.status < 200 || pageContents.status > 299) {
|
||||
throw new Error(`Invalid response code[${pageContents.status}]`);
|
||||
}
|
||||
|
||||
return null;
|
||||
pageContents = pageContents.data;
|
||||
|
||||
} catch (err) {
|
||||
// "Handle" the error so that it's in the above .catch
|
||||
if (this.verbose) {
|
||||
console.error(`[Error]::getPostDetails::`);
|
||||
}
|
||||
throw err;
|
||||
}
|
||||
|
||||
// Process the page's posts with cheerio
|
||||
const $ = cheerio.load((pageContents as string));
|
||||
|
||||
const postTags: Array<Tag> = [];
|
||||
{ // Get the post's tags
|
||||
const tagsSection = $(`#tag-sidebar`);
|
||||
if (tagsSection.length <= 0) {
|
||||
throw new Error(`Failed to find post tags, invalid post`);
|
||||
}
|
||||
|
||||
// Run a query for all tags
|
||||
tagsSection.find(`.tag`).each(function () {
|
||||
// Go through the classes of the tag and see if we find the type
|
||||
let tagType = "general";
|
||||
const classList = $(this).attr(`class`).split(" ");
|
||||
if (classList.length > 0) {
|
||||
for (let tt of classList) {
|
||||
if (tt.includes(`tag-type-`)) {
|
||||
tagType = tt.split(`-type-`)[1];
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const tagAnchor = $(this).find(`a`);
|
||||
let tagName = `unknown`;
|
||||
let tagSlug = `unknown`;
|
||||
{ // Get the name of the tag and slug
|
||||
if (tagAnchor.length > 0) {
|
||||
tagName = tagAnchor.text();
|
||||
tagSlug = tagAnchor.attr(`href`).split("tags=")[1];
|
||||
}
|
||||
}
|
||||
|
||||
// Add the tag to the postTags listing
|
||||
postTags.push({
|
||||
slug: tagSlug ?? `unknown`,
|
||||
type: tagType ?? `general`
|
||||
});
|
||||
|
||||
})
|
||||
|
||||
}
|
||||
|
||||
let postContent = `ERROR`;
|
||||
{ // get the link to the post's original image/video
|
||||
const imageLink = $(`meta[property="og:image"]`);
|
||||
|
||||
if (imageLink.length > 0) {
|
||||
postContent = imageLink.attr(`content`);
|
||||
}
|
||||
|
||||
// Make sure the postContent isn't just a link back like they like to do and/or we didn't find anything
|
||||
if (postContent == `ERROR` || postContent == url) {
|
||||
// Get the current page's contnet
|
||||
postContent = $(`#fit-to-screen img`).attr(`src`);
|
||||
}
|
||||
|
||||
if (postContent.indexOf(`?`) >= 5) {
|
||||
postContent = postContent.split(`?`)[0];
|
||||
}
|
||||
|
||||
if (postContent.indexOf(`//`) >= 0) {
|
||||
postContent = postContent;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// Get the source of the post
|
||||
let postSource = null;
|
||||
{
|
||||
const sourceA = $(`#stats a[rel="nofollow"]`);
|
||||
if (sourceA.length > 0) {
|
||||
postSource = sourceA.attr(`href`);
|
||||
}
|
||||
}
|
||||
|
||||
let postDate = "2021-10-17 13:18:27";
|
||||
{
|
||||
const postDateRef = $(`#stats li:nth-child(2)`);
|
||||
if (postDateRef.length > 0) {
|
||||
postDate = postDateRef.text().split("\n")[1].replace(/Posted: /g, '');
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
url: url,
|
||||
contentURL: postContent ?? "ERROR",
|
||||
source: postSource,
|
||||
tags: postTags ?? [],
|
||||
ts: postDate,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -8,8 +8,8 @@ import {Post} from "./type/generic";
|
|||
r34.verbose = true;
|
||||
|
||||
// Run the get post Details function
|
||||
let postDetails: Array<string>;
|
||||
await r34.crawlPages(`https://rule34.xxx/index.php?page=post&s=list&tags=thelorope`, 35)
|
||||
let postDetails: any;
|
||||
await r34.getPostDetails(`https://rule34.xxx/index.php?page=post&s=view&id=5203781`)
|
||||
.then( postData => {
|
||||
postDetails = postData;
|
||||
})
|
||||
|
|
Reference in New Issue