scrape-marketplace-listing.js
· 6.5 KiB · JavaScript
原始檔案
class Zip {
// This is a modified version of https://github.com/pwasystem/zip/
constructor(name) {
this.name = name;
this.zip = new Array();
this.file = new Array();
this.o = this.makeo();
}
dec2bin=(dec,size)=>dec.toString(2).padStart(size,'0');
str2dec=str=>Array.from(new TextEncoder().encode(str));
str2hex=str=>[...new TextEncoder().encode(str)].map(x=>x.toString(16).padStart(2,'0'));
hex2buf=hex=>new Uint8Array(hex.split(' ').map(x=>parseInt(x,16)));
bin2hex=bin=>(parseInt(bin.slice(8),2).toString(16).padStart(2,'0')+' '+parseInt(bin.slice(0,8),2).toString(16).padStart(2,'0'));
reverse=hex=>{
let hexArray=new Array();
for(let i=0;i<hex.length;i=i+2)hexArray[i]=hex[i]+''+hex[i+1];
return hexArray.filter((a)=>a).reverse().join(' ');
}
makeo=()=>{
for(var a,o=[],c=0;c<256;c++){
a=c;
for(var f=0;f<8;f++)a=1&a?3988292384^a>>>1:a>>>1;
o[c]=a;
}
return o;
}
crc32=r=>{
for(var n=-1,t=0;t<r.length;t++)n=n>>>8^this.o[255&(n^r[t])];
return this.reverse(((-1^n)>>>0).toString(16).padStart(8,'0'));
}
fetch2zip(filesArray,folder='', download=false){
var counter = filesArray.length;
filesArray.forEach(fileUrl=>{
let resp;
fetch(fileUrl).then(response=>{
resp=response;
return response.arrayBuffer();
}).then(blob=>{
new Response(blob).arrayBuffer().then(buffer=>{
let fileName = fileUrl.substring(fileUrl.lastIndexOf('/') + 1, fileUrl.indexOf('?'));
console.log(`File: ${fileName} from ${fileUrl} load`);
let uint=[...new Uint8Array(buffer)];
uint.modTime=resp.headers.get('Last-Modified');
uint.fileUrl=`${this.name}/${fileName}`;
this.zip[fileName]=uint;
counter--;
if (download && counter == 0) {
this.makeZip();
}
});
});
});
}
str2zip(name,str,folder=''){
let uint=[...new Uint8Array(this.str2dec(str))];
uint.name=name;
uint.modTime=new Date();
uint.fileUrl=`${this.name}/${folder}${name}`;
this.zip[name]=uint;
}
files2zip(files,folder=''){
for(let i=0;i<files.length;i++){
files[i].arrayBuffer().then(data=>{
let uint=[...new Uint8Array(data)];
uint.name=files[i].name;
uint.modTime=files[i].lastModified;
uint.fileUrl=`${this.name}/${folder}${files[i].name}`;
this.zip[uint.fileUrl]=uint;
});
}
}
makeZip(){
let count=0;
let fileHeader='';
let centralDirectoryFileHeader='';
let directoryInit=0;
let offSetLocalHeader='00 00 00 00';
let zip=this.zip;
for(const name in zip){
let lastMod, hour, minutes, seconds, year, month, day;
let modTime=()=>{
lastMod=new Date(zip[name].modTime);
hour=this.dec2bin(lastMod.getHours(),5);
minutes=this.dec2bin(lastMod.getMinutes(),6);
seconds=this.dec2bin(Math.round(lastMod.getSeconds()/2),5);
year=this.dec2bin(lastMod.getFullYear()-1980,7);
month=this.dec2bin(lastMod.getMonth()+1,4);
day=this.dec2bin(lastMod.getDate(),5);
return this.bin2hex(`${hour}${minutes}${seconds}`)+' '+this.bin2hex(`${year}${month}${day}`);
}
let crc=this.crc32(zip[name]);
let size=this.reverse(parseInt(zip[name].length).toString(16).padStart(8,'0'));
let nameFile=this.str2hex(zip[name].fileUrl).join(' ');
let nameBytes = new TextEncoder().encode(zip[name].fileUrl);
let nameSize = this.reverse(nameBytes.length.toString(16).padStart(4, '0'));
let fileHeader=`50 4B 03 04 14 00 00 00 00 00 ${modTime()} ${crc} ${size} ${size} ${nameSize} 00 00 ${nameFile}`;
let fileHeaderBuffer=this.hex2buf(fileHeader);
directoryInit=directoryInit+fileHeaderBuffer.length+zip[name].length;
centralDirectoryFileHeader=`${centralDirectoryFileHeader}50 4B 01 02 14 00 14 00 00 00 00 00 ${modTime()} ${crc} ${size} ${size} ${nameSize} 00 00 00 00 00 00 01 00 20 00 00 00 ${offSetLocalHeader} ${nameFile} `;
offSetLocalHeader=this.reverse(directoryInit.toString(16).padStart(8,'0'));
this.file.push(fileHeaderBuffer,new Uint8Array(zip[name]));
count++;
}
centralDirectoryFileHeader=centralDirectoryFileHeader.trim();
let entries=this.reverse(count.toString(16).padStart(4,'0'));
let dirSize=this.reverse(centralDirectoryFileHeader.split(' ').length.toString(16).padStart(8,'0'));
let dirInit=this.reverse(directoryInit.toString(16).padStart(8,'0'));
let centralDirectory=`50 4b 05 06 00 00 00 00 ${entries} ${entries} ${dirSize} ${dirInit} 00 00`;
this.file.push(this.hex2buf(centralDirectoryFileHeader),this.hex2buf(centralDirectory));
let a = document.createElement('a');
a.href = URL.createObjectURL(new Blob([...this.file],{type:'application/octet-stream'}));
console.log(a.href)
a.download = `${this.name}.zip`;
a.click();
}
}
var filterList = [
"",
"Save",
"Share",
"Details",
"Send",
"Send seller a message",
"Condition",
"Location is approximate",
"Seller information Seller details",
"Seller information",
"Seller details",
"Message"
];
var filterSet = new Set(filterList);
var getDescription = (selector) => {
return Array.from(new Set(Array.from(document.querySelectorAll(selector)).map(i => i.textContent ).filter(i => !filterSet.has(i)))).join('\n');
};
var getImages = (selector) => {
return Array.from(document.querySelectorAll(selector)).map(i => i.src );
};
var title = document.title.trim().replaceAll(' ', '_').replace(/[^_a-zA-Z0-9]/g, '').replace('Marketplace__', '').replace('__Facebook', '');
var z = new Zip(title);
// Might have to update the selector argument to getDescription and getImages!
z.str2zip('description.txt', getDescription('div.xzepove:nth-child(1) > div:nth-child(1) > div:nth-child(1) span'));
z.fetch2zip(getImages('div.xh8yej3:nth-child(3) img'), folder='', download=true);
| 1 | class Zip { |
| 2 | // This is a modified version of https://github.com/pwasystem/zip/ |
| 3 | |
| 4 | constructor(name) { |
| 5 | this.name = name; |
| 6 | this.zip = new Array(); |
| 7 | this.file = new Array(); |
| 8 | this.o = this.makeo(); |
| 9 | } |
| 10 | |
| 11 | dec2bin=(dec,size)=>dec.toString(2).padStart(size,'0'); |
| 12 | str2dec=str=>Array.from(new TextEncoder().encode(str)); |
| 13 | str2hex=str=>[...new TextEncoder().encode(str)].map(x=>x.toString(16).padStart(2,'0')); |
| 14 | hex2buf=hex=>new Uint8Array(hex.split(' ').map(x=>parseInt(x,16))); |
| 15 | bin2hex=bin=>(parseInt(bin.slice(8),2).toString(16).padStart(2,'0')+' '+parseInt(bin.slice(0,8),2).toString(16).padStart(2,'0')); |
| 16 | |
| 17 | reverse=hex=>{ |
| 18 | let hexArray=new Array(); |
| 19 | for(let i=0;i<hex.length;i=i+2)hexArray[i]=hex[i]+''+hex[i+1]; |
| 20 | return hexArray.filter((a)=>a).reverse().join(' '); |
| 21 | } |
| 22 | |
| 23 | makeo=()=>{ |
| 24 | for(var a,o=[],c=0;c<256;c++){ |
| 25 | a=c; |
| 26 | for(var f=0;f<8;f++)a=1&a?3988292384^a>>>1:a>>>1; |
| 27 | o[c]=a; |
| 28 | } |
| 29 | return o; |
| 30 | } |
| 31 | |
| 32 | crc32=r=>{ |
| 33 | for(var n=-1,t=0;t<r.length;t++)n=n>>>8^this.o[255&(n^r[t])]; |
| 34 | return this.reverse(((-1^n)>>>0).toString(16).padStart(8,'0')); |
| 35 | } |
| 36 | |
| 37 | fetch2zip(filesArray,folder='', download=false){ |
| 38 | var counter = filesArray.length; |
| 39 | filesArray.forEach(fileUrl=>{ |
| 40 | let resp; |
| 41 | fetch(fileUrl).then(response=>{ |
| 42 | resp=response; |
| 43 | return response.arrayBuffer(); |
| 44 | }).then(blob=>{ |
| 45 | new Response(blob).arrayBuffer().then(buffer=>{ |
| 46 | let fileName = fileUrl.substring(fileUrl.lastIndexOf('/') + 1, fileUrl.indexOf('?')); |
| 47 | console.log(`File: ${fileName} from ${fileUrl} load`); |
| 48 | let uint=[...new Uint8Array(buffer)]; |
| 49 | uint.modTime=resp.headers.get('Last-Modified'); |
| 50 | uint.fileUrl=`${this.name}/${fileName}`; |
| 51 | this.zip[fileName]=uint; |
| 52 | counter--; |
| 53 | if (download && counter == 0) { |
| 54 | this.makeZip(); |
| 55 | } |
| 56 | }); |
| 57 | }); |
| 58 | }); |
| 59 | } |
| 60 | |
| 61 | str2zip(name,str,folder=''){ |
| 62 | let uint=[...new Uint8Array(this.str2dec(str))]; |
| 63 | uint.name=name; |
| 64 | uint.modTime=new Date(); |
| 65 | uint.fileUrl=`${this.name}/${folder}${name}`; |
| 66 | this.zip[name]=uint; |
| 67 | } |
| 68 | |
| 69 | files2zip(files,folder=''){ |
| 70 | for(let i=0;i<files.length;i++){ |
| 71 | files[i].arrayBuffer().then(data=>{ |
| 72 | let uint=[...new Uint8Array(data)]; |
| 73 | uint.name=files[i].name; |
| 74 | uint.modTime=files[i].lastModified; |
| 75 | uint.fileUrl=`${this.name}/${folder}${files[i].name}`; |
| 76 | this.zip[uint.fileUrl]=uint; |
| 77 | }); |
| 78 | } |
| 79 | } |
| 80 | |
| 81 | makeZip(){ |
| 82 | let count=0; |
| 83 | let fileHeader=''; |
| 84 | let centralDirectoryFileHeader=''; |
| 85 | let directoryInit=0; |
| 86 | let offSetLocalHeader='00 00 00 00'; |
| 87 | let zip=this.zip; |
| 88 | for(const name in zip){ |
| 89 | let lastMod, hour, minutes, seconds, year, month, day; |
| 90 | let modTime=()=>{ |
| 91 | lastMod=new Date(zip[name].modTime); |
| 92 | hour=this.dec2bin(lastMod.getHours(),5); |
| 93 | minutes=this.dec2bin(lastMod.getMinutes(),6); |
| 94 | seconds=this.dec2bin(Math.round(lastMod.getSeconds()/2),5); |
| 95 | year=this.dec2bin(lastMod.getFullYear()-1980,7); |
| 96 | month=this.dec2bin(lastMod.getMonth()+1,4); |
| 97 | day=this.dec2bin(lastMod.getDate(),5); |
| 98 | return this.bin2hex(`${hour}${minutes}${seconds}`)+' '+this.bin2hex(`${year}${month}${day}`); |
| 99 | } |
| 100 | let crc=this.crc32(zip[name]); |
| 101 | let size=this.reverse(parseInt(zip[name].length).toString(16).padStart(8,'0')); |
| 102 | let nameFile=this.str2hex(zip[name].fileUrl).join(' '); |
| 103 | let nameBytes = new TextEncoder().encode(zip[name].fileUrl); |
| 104 | let nameSize = this.reverse(nameBytes.length.toString(16).padStart(4, '0')); |
| 105 | let fileHeader=`50 4B 03 04 14 00 00 00 00 00 ${modTime()} ${crc} ${size} ${size} ${nameSize} 00 00 ${nameFile}`; |
| 106 | let fileHeaderBuffer=this.hex2buf(fileHeader); |
| 107 | directoryInit=directoryInit+fileHeaderBuffer.length+zip[name].length; |
| 108 | centralDirectoryFileHeader=`${centralDirectoryFileHeader}50 4B 01 02 14 00 14 00 00 00 00 00 ${modTime()} ${crc} ${size} ${size} ${nameSize} 00 00 00 00 00 00 01 00 20 00 00 00 ${offSetLocalHeader} ${nameFile} `; |
| 109 | offSetLocalHeader=this.reverse(directoryInit.toString(16).padStart(8,'0')); |
| 110 | this.file.push(fileHeaderBuffer,new Uint8Array(zip[name])); |
| 111 | count++; |
| 112 | } |
| 113 | centralDirectoryFileHeader=centralDirectoryFileHeader.trim(); |
| 114 | let entries=this.reverse(count.toString(16).padStart(4,'0')); |
| 115 | let dirSize=this.reverse(centralDirectoryFileHeader.split(' ').length.toString(16).padStart(8,'0')); |
| 116 | let dirInit=this.reverse(directoryInit.toString(16).padStart(8,'0')); |
| 117 | let centralDirectory=`50 4b 05 06 00 00 00 00 ${entries} ${entries} ${dirSize} ${dirInit} 00 00`; |
| 118 | |
| 119 | |
| 120 | this.file.push(this.hex2buf(centralDirectoryFileHeader),this.hex2buf(centralDirectory)); |
| 121 | |
| 122 | let a = document.createElement('a'); |
| 123 | a.href = URL.createObjectURL(new Blob([...this.file],{type:'application/octet-stream'})); |
| 124 | console.log(a.href) |
| 125 | a.download = `${this.name}.zip`; |
| 126 | a.click(); |
| 127 | } |
| 128 | } |
| 129 | |
| 130 | var filterList = [ |
| 131 | "", |
| 132 | "Save", |
| 133 | "Share", |
| 134 | "Details", |
| 135 | "Send", |
| 136 | "Send seller a message", |
| 137 | "Condition", |
| 138 | "Location is approximate", |
| 139 | "Seller information Seller details", |
| 140 | "Seller information", |
| 141 | "Seller details", |
| 142 | "Message" |
| 143 | ]; |
| 144 | var filterSet = new Set(filterList); |
| 145 | |
| 146 | var getDescription = (selector) => { |
| 147 | return Array.from(new Set(Array.from(document.querySelectorAll(selector)).map(i => i.textContent ).filter(i => !filterSet.has(i)))).join('\n'); |
| 148 | }; |
| 149 | |
| 150 | var getImages = (selector) => { |
| 151 | return Array.from(document.querySelectorAll(selector)).map(i => i.src ); |
| 152 | }; |
| 153 | |
| 154 | var title = document.title.trim().replaceAll(' ', '_').replace(/[^_a-zA-Z0-9]/g, '').replace('Marketplace__', '').replace('__Facebook', ''); |
| 155 | |
| 156 | var z = new Zip(title); |
| 157 | // Might have to update the selector argument to getDescription and getImages! |
| 158 | z.str2zip('description.txt', getDescription('div.xzepove:nth-child(1) > div:nth-child(1) > div:nth-child(1) span')); |
| 159 | z.fetch2zip(getImages('div.xh8yej3:nth-child(3) img'), folder='', download=true); |