Dernière activité 1739740857

scrape-marketplace-listing.js Brut
1class Zip {
2 // This is a modified version of https://github.com/pwasystem/zip/
3
4 constructor(name) {
5 this.name = name;
6 this.zip = new Array();
7 this.file = new Array();
8 this.o = this.makeo();
9 }
10
11 dec2bin=(dec,size)=>dec.toString(2).padStart(size,'0');
12 str2dec=str=>Array.from(new TextEncoder().encode(str));
13 str2hex=str=>[...new TextEncoder().encode(str)].map(x=>x.toString(16).padStart(2,'0'));
14 hex2buf=hex=>new Uint8Array(hex.split(' ').map(x=>parseInt(x,16)));
15 bin2hex=bin=>(parseInt(bin.slice(8),2).toString(16).padStart(2,'0')+' '+parseInt(bin.slice(0,8),2).toString(16).padStart(2,'0'));
16
17 reverse=hex=>{
18 let hexArray=new Array();
19 for(let i=0;i<hex.length;i=i+2)hexArray[i]=hex[i]+''+hex[i+1];
20 return hexArray.filter((a)=>a).reverse().join(' ');
21 }
22
23 makeo=()=>{
24 for(var a,o=[],c=0;c<256;c++){
25 a=c;
26 for(var f=0;f<8;f++)a=1&a?3988292384^a>>>1:a>>>1;
27 o[c]=a;
28 }
29 return o;
30 }
31
32 crc32=r=>{
33 for(var n=-1,t=0;t<r.length;t++)n=n>>>8^this.o[255&(n^r[t])];
34 return this.reverse(((-1^n)>>>0).toString(16).padStart(8,'0'));
35 }
36
37 fetch2zip(filesArray,folder='', download=false){
38 var counter = filesArray.length;
39 filesArray.forEach(fileUrl=>{
40 let resp;
41 fetch(fileUrl).then(response=>{
42 resp=response;
43 return response.arrayBuffer();
44 }).then(blob=>{
45 new Response(blob).arrayBuffer().then(buffer=>{
46 let fileName = fileUrl.substring(fileUrl.lastIndexOf('/') + 1, fileUrl.indexOf('?'));
47 console.log(`File: ${fileName} from ${fileUrl} load`);
48 let uint=[...new Uint8Array(buffer)];
49 uint.modTime=resp.headers.get('Last-Modified');
50 uint.fileUrl=`${this.name}/${fileName}`;
51 this.zip[fileName]=uint;
52 counter--;
53 if (download && counter == 0) {
54 this.makeZip();
55 }
56 });
57 });
58 });
59 }
60
61 str2zip(name,str,folder=''){
62 let uint=[...new Uint8Array(this.str2dec(str))];
63 uint.name=name;
64 uint.modTime=new Date();
65 uint.fileUrl=`${this.name}/${folder}${name}`;
66 this.zip[name]=uint;
67 }
68
69 files2zip(files,folder=''){
70 for(let i=0;i<files.length;i++){
71 files[i].arrayBuffer().then(data=>{
72 let uint=[...new Uint8Array(data)];
73 uint.name=files[i].name;
74 uint.modTime=files[i].lastModified;
75 uint.fileUrl=`${this.name}/${folder}${files[i].name}`;
76 this.zip[uint.fileUrl]=uint;
77 });
78 }
79 }
80
81 makeZip(){
82 let count=0;
83 let fileHeader='';
84 let centralDirectoryFileHeader='';
85 let directoryInit=0;
86 let offSetLocalHeader='00 00 00 00';
87 let zip=this.zip;
88 for(const name in zip){
89 let lastMod, hour, minutes, seconds, year, month, day;
90 let modTime=()=>{
91 lastMod=new Date(zip[name].modTime);
92 hour=this.dec2bin(lastMod.getHours(),5);
93 minutes=this.dec2bin(lastMod.getMinutes(),6);
94 seconds=this.dec2bin(Math.round(lastMod.getSeconds()/2),5);
95 year=this.dec2bin(lastMod.getFullYear()-1980,7);
96 month=this.dec2bin(lastMod.getMonth()+1,4);
97 day=this.dec2bin(lastMod.getDate(),5);
98 return this.bin2hex(`${hour}${minutes}${seconds}`)+' '+this.bin2hex(`${year}${month}${day}`);
99 }
100 let crc=this.crc32(zip[name]);
101 let size=this.reverse(parseInt(zip[name].length).toString(16).padStart(8,'0'));
102 let nameFile=this.str2hex(zip[name].fileUrl).join(' ');
103 let nameBytes = new TextEncoder().encode(zip[name].fileUrl);
104 let nameSize = this.reverse(nameBytes.length.toString(16).padStart(4, '0'));
105 let fileHeader=`50 4B 03 04 14 00 00 00 00 00 ${modTime()} ${crc} ${size} ${size} ${nameSize} 00 00 ${nameFile}`;
106 let fileHeaderBuffer=this.hex2buf(fileHeader);
107 directoryInit=directoryInit+fileHeaderBuffer.length+zip[name].length;
108 centralDirectoryFileHeader=`${centralDirectoryFileHeader}50 4B 01 02 14 00 14 00 00 00 00 00 ${modTime()} ${crc} ${size} ${size} ${nameSize} 00 00 00 00 00 00 01 00 20 00 00 00 ${offSetLocalHeader} ${nameFile} `;
109 offSetLocalHeader=this.reverse(directoryInit.toString(16).padStart(8,'0'));
110 this.file.push(fileHeaderBuffer,new Uint8Array(zip[name]));
111 count++;
112 }
113 centralDirectoryFileHeader=centralDirectoryFileHeader.trim();
114 let entries=this.reverse(count.toString(16).padStart(4,'0'));
115 let dirSize=this.reverse(centralDirectoryFileHeader.split(' ').length.toString(16).padStart(8,'0'));
116 let dirInit=this.reverse(directoryInit.toString(16).padStart(8,'0'));
117 let centralDirectory=`50 4b 05 06 00 00 00 00 ${entries} ${entries} ${dirSize} ${dirInit} 00 00`;
118
119
120 this.file.push(this.hex2buf(centralDirectoryFileHeader),this.hex2buf(centralDirectory));
121
122 let a = document.createElement('a');
123 a.href = URL.createObjectURL(new Blob([...this.file],{type:'application/octet-stream'}));
124 console.log(a.href)
125 a.download = `${this.name}.zip`;
126 a.click();
127 }
128}
129
130var filterList = [
131 "",
132 "Save",
133 "Share",
134 "Details",
135 "Send",
136 "Send seller a message",
137 "Condition",
138 "Location is approximate",
139 "Seller information Seller details",
140 "Seller information",
141 "Seller details",
142 "Message"
143];
144var filterSet = new Set(filterList);
145
146var getDescription = (selector) => {
147 return Array.from(new Set(Array.from(document.querySelectorAll(selector)).map(i => i.textContent ).filter(i => !filterSet.has(i)))).join('\n');
148};
149
150var getImages = (selector) => {
151 return Array.from(document.querySelectorAll(selector)).map(i => i.src );
152};
153
154var title = document.title.trim().replaceAll(' ', '_').replace(/[^_a-zA-Z0-9]/g, '').replace('Marketplace__', '').replace('__Facebook', '');
155
156var z = new Zip(title);
157// Might have to update the selector argument to getDescription and getImages!
158z.str2zip('description.txt', getDescription('div.xzepove:nth-child(1) > div:nth-child(1) > div:nth-child(1) span'));
159z.fetch2zip(getImages('div.xh8yej3:nth-child(3) img'), folder='', download=true);