draggor hat die Gist bearbeitet . Zu Änderung gehen
1 file changed, 159 insertions
scrape-marketplace-listing.js(Datei erstellt)
| @@ -0,0 +1,159 @@ | |||
| 1 | + | class Zip { | |
| 2 | + | // This is a modified version of https://github.com/pwasystem/zip/ | |
| 3 | + | ||
| 4 | + | constructor(name) { | |
| 5 | + | this.name = name; | |
| 6 | + | this.zip = new Array(); | |
| 7 | + | this.file = new Array(); | |
| 8 | + | this.o = this.makeo(); | |
| 9 | + | } | |
| 10 | + | ||
| 11 | + | dec2bin=(dec,size)=>dec.toString(2).padStart(size,'0'); | |
| 12 | + | str2dec=str=>Array.from(new TextEncoder().encode(str)); | |
| 13 | + | str2hex=str=>[...new TextEncoder().encode(str)].map(x=>x.toString(16).padStart(2,'0')); | |
| 14 | + | hex2buf=hex=>new Uint8Array(hex.split(' ').map(x=>parseInt(x,16))); | |
| 15 | + | bin2hex=bin=>(parseInt(bin.slice(8),2).toString(16).padStart(2,'0')+' '+parseInt(bin.slice(0,8),2).toString(16).padStart(2,'0')); | |
| 16 | + | ||
| 17 | + | reverse=hex=>{ | |
| 18 | + | let hexArray=new Array(); | |
| 19 | + | for(let i=0;i<hex.length;i=i+2)hexArray[i]=hex[i]+''+hex[i+1]; | |
| 20 | + | return hexArray.filter((a)=>a).reverse().join(' '); | |
| 21 | + | } | |
| 22 | + | ||
| 23 | + | makeo=()=>{ | |
| 24 | + | for(var a,o=[],c=0;c<256;c++){ | |
| 25 | + | a=c; | |
| 26 | + | for(var f=0;f<8;f++)a=1&a?3988292384^a>>>1:a>>>1; | |
| 27 | + | o[c]=a; | |
| 28 | + | } | |
| 29 | + | return o; | |
| 30 | + | } | |
| 31 | + | ||
| 32 | + | crc32=r=>{ | |
| 33 | + | for(var n=-1,t=0;t<r.length;t++)n=n>>>8^this.o[255&(n^r[t])]; | |
| 34 | + | return this.reverse(((-1^n)>>>0).toString(16).padStart(8,'0')); | |
| 35 | + | } | |
| 36 | + | ||
| 37 | + | fetch2zip(filesArray,folder='', download=false){ | |
| 38 | + | var counter = filesArray.length; | |
| 39 | + | filesArray.forEach(fileUrl=>{ | |
| 40 | + | let resp; | |
| 41 | + | fetch(fileUrl).then(response=>{ | |
| 42 | + | resp=response; | |
| 43 | + | return response.arrayBuffer(); | |
| 44 | + | }).then(blob=>{ | |
| 45 | + | new Response(blob).arrayBuffer().then(buffer=>{ | |
| 46 | + | let fileName = fileUrl.substring(fileUrl.lastIndexOf('/') + 1, fileUrl.indexOf('?')); | |
| 47 | + | console.log(`File: ${fileName} from ${fileUrl} load`); | |
| 48 | + | let uint=[...new Uint8Array(buffer)]; | |
| 49 | + | uint.modTime=resp.headers.get('Last-Modified'); | |
| 50 | + | uint.fileUrl=`${this.name}/${fileName}`; | |
| 51 | + | this.zip[fileName]=uint; | |
| 52 | + | counter--; | |
| 53 | + | if (download && counter == 0) { | |
| 54 | + | this.makeZip(); | |
| 55 | + | } | |
| 56 | + | }); | |
| 57 | + | }); | |
| 58 | + | }); | |
| 59 | + | } | |
| 60 | + | ||
| 61 | + | str2zip(name,str,folder=''){ | |
| 62 | + | let uint=[...new Uint8Array(this.str2dec(str))]; | |
| 63 | + | uint.name=name; | |
| 64 | + | uint.modTime=new Date(); | |
| 65 | + | uint.fileUrl=`${this.name}/${folder}${name}`; | |
| 66 | + | this.zip[name]=uint; | |
| 67 | + | } | |
| 68 | + | ||
| 69 | + | files2zip(files,folder=''){ | |
| 70 | + | for(let i=0;i<files.length;i++){ | |
| 71 | + | files[i].arrayBuffer().then(data=>{ | |
| 72 | + | let uint=[...new Uint8Array(data)]; | |
| 73 | + | uint.name=files[i].name; | |
| 74 | + | uint.modTime=files[i].lastModified; | |
| 75 | + | uint.fileUrl=`${this.name}/${folder}${files[i].name}`; | |
| 76 | + | this.zip[uint.fileUrl]=uint; | |
| 77 | + | }); | |
| 78 | + | } | |
| 79 | + | } | |
| 80 | + | ||
| 81 | + | makeZip(){ | |
| 82 | + | let count=0; | |
| 83 | + | let fileHeader=''; | |
| 84 | + | let centralDirectoryFileHeader=''; | |
| 85 | + | let directoryInit=0; | |
| 86 | + | let offSetLocalHeader='00 00 00 00'; | |
| 87 | + | let zip=this.zip; | |
| 88 | + | for(const name in zip){ | |
| 89 | + | let lastMod, hour, minutes, seconds, year, month, day; | |
| 90 | + | let modTime=()=>{ | |
| 91 | + | lastMod=new Date(zip[name].modTime); | |
| 92 | + | hour=this.dec2bin(lastMod.getHours(),5); | |
| 93 | + | minutes=this.dec2bin(lastMod.getMinutes(),6); | |
| 94 | + | seconds=this.dec2bin(Math.round(lastMod.getSeconds()/2),5); | |
| 95 | + | year=this.dec2bin(lastMod.getFullYear()-1980,7); | |
| 96 | + | month=this.dec2bin(lastMod.getMonth()+1,4); | |
| 97 | + | day=this.dec2bin(lastMod.getDate(),5); | |
| 98 | + | return this.bin2hex(`${hour}${minutes}${seconds}`)+' '+this.bin2hex(`${year}${month}${day}`); | |
| 99 | + | } | |
| 100 | + | let crc=this.crc32(zip[name]); | |
| 101 | + | let size=this.reverse(parseInt(zip[name].length).toString(16).padStart(8,'0')); | |
| 102 | + | let nameFile=this.str2hex(zip[name].fileUrl).join(' '); | |
| 103 | + | let nameBytes = new TextEncoder().encode(zip[name].fileUrl); | |
| 104 | + | let nameSize = this.reverse(nameBytes.length.toString(16).padStart(4, '0')); | |
| 105 | + | let fileHeader=`50 4B 03 04 14 00 00 00 00 00 ${modTime()} ${crc} ${size} ${size} ${nameSize} 00 00 ${nameFile}`; | |
| 106 | + | let fileHeaderBuffer=this.hex2buf(fileHeader); | |
| 107 | + | directoryInit=directoryInit+fileHeaderBuffer.length+zip[name].length; | |
| 108 | + | centralDirectoryFileHeader=`${centralDirectoryFileHeader}50 4B 01 02 14 00 14 00 00 00 00 00 ${modTime()} ${crc} ${size} ${size} ${nameSize} 00 00 00 00 00 00 01 00 20 00 00 00 ${offSetLocalHeader} ${nameFile} `; | |
| 109 | + | offSetLocalHeader=this.reverse(directoryInit.toString(16).padStart(8,'0')); | |
| 110 | + | this.file.push(fileHeaderBuffer,new Uint8Array(zip[name])); | |
| 111 | + | count++; | |
| 112 | + | } | |
| 113 | + | centralDirectoryFileHeader=centralDirectoryFileHeader.trim(); | |
| 114 | + | let entries=this.reverse(count.toString(16).padStart(4,'0')); | |
| 115 | + | let dirSize=this.reverse(centralDirectoryFileHeader.split(' ').length.toString(16).padStart(8,'0')); | |
| 116 | + | let dirInit=this.reverse(directoryInit.toString(16).padStart(8,'0')); | |
| 117 | + | let centralDirectory=`50 4b 05 06 00 00 00 00 ${entries} ${entries} ${dirSize} ${dirInit} 00 00`; | |
| 118 | + | ||
| 119 | + | ||
| 120 | + | this.file.push(this.hex2buf(centralDirectoryFileHeader),this.hex2buf(centralDirectory)); | |
| 121 | + | ||
| 122 | + | let a = document.createElement('a'); | |
| 123 | + | a.href = URL.createObjectURL(new Blob([...this.file],{type:'application/octet-stream'})); | |
| 124 | + | console.log(a.href) | |
| 125 | + | a.download = `${this.name}.zip`; | |
| 126 | + | a.click(); | |
| 127 | + | } | |
| 128 | + | } | |
| 129 | + | ||
| 130 | + | var filterList = [ | |
| 131 | + | "", | |
| 132 | + | "Save", | |
| 133 | + | "Share", | |
| 134 | + | "Details", | |
| 135 | + | "Send", | |
| 136 | + | "Send seller a message", | |
| 137 | + | "Condition", | |
| 138 | + | "Location is approximate", | |
| 139 | + | "Seller information Seller details", | |
| 140 | + | "Seller information", | |
| 141 | + | "Seller details", | |
| 142 | + | "Message" | |
| 143 | + | ]; | |
| 144 | + | var filterSet = new Set(filterList); | |
| 145 | + | ||
| 146 | + | var getDescription = (selector) => { | |
| 147 | + | return Array.from(new Set(Array.from(document.querySelectorAll(selector)).map(i => i.textContent ).filter(i => !filterSet.has(i)))).join('\n'); | |
| 148 | + | }; | |
| 149 | + | ||
| 150 | + | var getImages = (selector) => { | |
| 151 | + | return Array.from(document.querySelectorAll(selector)).map(i => i.src ); | |
| 152 | + | }; | |
| 153 | + | ||
| 154 | + | var title = document.title.trim().replaceAll(' ', '_').replace(/[^_a-zA-Z0-9]/g, '').replace('Marketplace__', '').replace('__Facebook', ''); | |
| 155 | + | ||
| 156 | + | var z = new Zip(title); | |
| 157 | + | // Might have to update the selector argument to getDescription and getImages! | |
| 158 | + | z.str2zip('description.txt', getDescription('div.xzepove:nth-child(1) > div:nth-child(1) > div:nth-child(1) span')); | |
| 159 | + | z.fetch2zip(getImages('div.xh8yej3:nth-child(3) img'), folder='', download=true); | |
Neuer
Älter