Последняя активность 1739740857

draggor ревизий этого фрагмента 1739740857. К ревизии

1 file changed, 159 insertions

scrape-marketplace-listing.js(файл создан)

@@ -0,0 +1,159 @@
1 + class Zip {
2 + // This is a modified version of https://github.com/pwasystem/zip/
3 +
4 + constructor(name) {
5 + this.name = name;
6 + this.zip = new Array();
7 + this.file = new Array();
8 + this.o = this.makeo();
9 + }
10 +
11 + dec2bin=(dec,size)=>dec.toString(2).padStart(size,'0');
12 + str2dec=str=>Array.from(new TextEncoder().encode(str));
13 + str2hex=str=>[...new TextEncoder().encode(str)].map(x=>x.toString(16).padStart(2,'0'));
14 + hex2buf=hex=>new Uint8Array(hex.split(' ').map(x=>parseInt(x,16)));
15 + bin2hex=bin=>(parseInt(bin.slice(8),2).toString(16).padStart(2,'0')+' '+parseInt(bin.slice(0,8),2).toString(16).padStart(2,'0'));
16 +
17 + reverse=hex=>{
18 + let hexArray=new Array();
19 + for(let i=0;i<hex.length;i=i+2)hexArray[i]=hex[i]+''+hex[i+1];
20 + return hexArray.filter((a)=>a).reverse().join(' ');
21 + }
22 +
23 + makeo=()=>{
24 + for(var a,o=[],c=0;c<256;c++){
25 + a=c;
26 + for(var f=0;f<8;f++)a=1&a?3988292384^a>>>1:a>>>1;
27 + o[c]=a;
28 + }
29 + return o;
30 + }
31 +
32 + crc32=r=>{
33 + for(var n=-1,t=0;t<r.length;t++)n=n>>>8^this.o[255&(n^r[t])];
34 + return this.reverse(((-1^n)>>>0).toString(16).padStart(8,'0'));
35 + }
36 +
37 + fetch2zip(filesArray,folder='', download=false){
38 + var counter = filesArray.length;
39 + filesArray.forEach(fileUrl=>{
40 + let resp;
41 + fetch(fileUrl).then(response=>{
42 + resp=response;
43 + return response.arrayBuffer();
44 + }).then(blob=>{
45 + new Response(blob).arrayBuffer().then(buffer=>{
46 + let fileName = fileUrl.substring(fileUrl.lastIndexOf('/') + 1, fileUrl.indexOf('?'));
47 + console.log(`File: ${fileName} from ${fileUrl} load`);
48 + let uint=[...new Uint8Array(buffer)];
49 + uint.modTime=resp.headers.get('Last-Modified');
50 + uint.fileUrl=`${this.name}/${fileName}`;
51 + this.zip[fileName]=uint;
52 + counter--;
53 + if (download && counter == 0) {
54 + this.makeZip();
55 + }
56 + });
57 + });
58 + });
59 + }
60 +
61 + str2zip(name,str,folder=''){
62 + let uint=[...new Uint8Array(this.str2dec(str))];
63 + uint.name=name;
64 + uint.modTime=new Date();
65 + uint.fileUrl=`${this.name}/${folder}${name}`;
66 + this.zip[name]=uint;
67 + }
68 +
69 + files2zip(files,folder=''){
70 + for(let i=0;i<files.length;i++){
71 + files[i].arrayBuffer().then(data=>{
72 + let uint=[...new Uint8Array(data)];
73 + uint.name=files[i].name;
74 + uint.modTime=files[i].lastModified;
75 + uint.fileUrl=`${this.name}/${folder}${files[i].name}`;
76 + this.zip[uint.fileUrl]=uint;
77 + });
78 + }
79 + }
80 +
81 + makeZip(){
82 + let count=0;
83 + let fileHeader='';
84 + let centralDirectoryFileHeader='';
85 + let directoryInit=0;
86 + let offSetLocalHeader='00 00 00 00';
87 + let zip=this.zip;
88 + for(const name in zip){
89 + let lastMod, hour, minutes, seconds, year, month, day;
90 + let modTime=()=>{
91 + lastMod=new Date(zip[name].modTime);
92 + hour=this.dec2bin(lastMod.getHours(),5);
93 + minutes=this.dec2bin(lastMod.getMinutes(),6);
94 + seconds=this.dec2bin(Math.round(lastMod.getSeconds()/2),5);
95 + year=this.dec2bin(lastMod.getFullYear()-1980,7);
96 + month=this.dec2bin(lastMod.getMonth()+1,4);
97 + day=this.dec2bin(lastMod.getDate(),5);
98 + return this.bin2hex(`${hour}${minutes}${seconds}`)+' '+this.bin2hex(`${year}${month}${day}`);
99 + }
100 + let crc=this.crc32(zip[name]);
101 + let size=this.reverse(parseInt(zip[name].length).toString(16).padStart(8,'0'));
102 + let nameFile=this.str2hex(zip[name].fileUrl).join(' ');
103 + let nameBytes = new TextEncoder().encode(zip[name].fileUrl);
104 + let nameSize = this.reverse(nameBytes.length.toString(16).padStart(4, '0'));
105 + let fileHeader=`50 4B 03 04 14 00 00 00 00 00 ${modTime()} ${crc} ${size} ${size} ${nameSize} 00 00 ${nameFile}`;
106 + let fileHeaderBuffer=this.hex2buf(fileHeader);
107 + directoryInit=directoryInit+fileHeaderBuffer.length+zip[name].length;
108 + centralDirectoryFileHeader=`${centralDirectoryFileHeader}50 4B 01 02 14 00 14 00 00 00 00 00 ${modTime()} ${crc} ${size} ${size} ${nameSize} 00 00 00 00 00 00 01 00 20 00 00 00 ${offSetLocalHeader} ${nameFile} `;
109 + offSetLocalHeader=this.reverse(directoryInit.toString(16).padStart(8,'0'));
110 + this.file.push(fileHeaderBuffer,new Uint8Array(zip[name]));
111 + count++;
112 + }
113 + centralDirectoryFileHeader=centralDirectoryFileHeader.trim();
114 + let entries=this.reverse(count.toString(16).padStart(4,'0'));
115 + let dirSize=this.reverse(centralDirectoryFileHeader.split(' ').length.toString(16).padStart(8,'0'));
116 + let dirInit=this.reverse(directoryInit.toString(16).padStart(8,'0'));
117 + let centralDirectory=`50 4b 05 06 00 00 00 00 ${entries} ${entries} ${dirSize} ${dirInit} 00 00`;
118 +
119 +
120 + this.file.push(this.hex2buf(centralDirectoryFileHeader),this.hex2buf(centralDirectory));
121 +
122 + let a = document.createElement('a');
123 + a.href = URL.createObjectURL(new Blob([...this.file],{type:'application/octet-stream'}));
124 + console.log(a.href)
125 + a.download = `${this.name}.zip`;
126 + a.click();
127 + }
128 + }
129 +
130 + var filterList = [
131 + "",
132 + "Save",
133 + "Share",
134 + "Details",
135 + "Send",
136 + "Send seller a message",
137 + "Condition",
138 + "Location is approximate",
139 + "Seller information Seller details",
140 + "Seller information",
141 + "Seller details",
142 + "Message"
143 + ];
144 + var filterSet = new Set(filterList);
145 +
146 + var getDescription = (selector) => {
147 + return Array.from(new Set(Array.from(document.querySelectorAll(selector)).map(i => i.textContent ).filter(i => !filterSet.has(i)))).join('\n');
148 + };
149 +
150 + var getImages = (selector) => {
151 + return Array.from(document.querySelectorAll(selector)).map(i => i.src );
152 + };
153 +
154 + var title = document.title.trim().replaceAll(' ', '_').replace(/[^_a-zA-Z0-9]/g, '').replace('Marketplace__', '').replace('__Facebook', '');
155 +
156 + var z = new Zip(title);
157 + // Might have to update the selector argument to getDescription and getImages!
158 + z.str2zip('description.txt', getDescription('div.xzepove:nth-child(1) > div:nth-child(1) > div:nth-child(1) span'));
159 + z.fetch2zip(getImages('div.xh8yej3:nth-child(3) img'), folder='', download=true);
Новее Позже