https://pylab.me/blog/post/python_spider_with_tampermonkey_s1

// ==UserScript==
// @name 淘宝item
// @namespace http://tampermonkey.net/
// @version 0.1
// @description try to take over the world!
// @author You
// @match https://sf-item.taobao.com/sf_item/713690244794.htm?spm=a213w.7398504.paiList.16.13154827Z13a5f&track_id=a7eb667b-ee7a-436a-ba80-e9fab9215720
// @icon https://www.google.com/s2/favicons?sz=64&domain=taobao.com
// @require https://cdn.bootcss.com/jquery/2.2.1/jquery.js
// @grant GM_xmlhttpRequest
// ==/UserScript==
(function() {
'use strict';
async function sleep(ms = 0) {
return new Promise(r => setTimeout(r, ms));
}
let urlList = [];
var obj = {};
function lp_send_data(data) {
GM_xmlhttpRequest({
method: "POST",
url: "http://127.0.0.1:8080/doData",
headers: {"Content-Type": "application/json;charset=UTF-8"},
data: data,
onload: function(response) {
// window.close();
}
});
}
function getData(){
// 房产
obj.标题 = $('h1').text().replace(/\s*/g,"");
// 标的物介绍
let bdwjs = $('#J_ItemDetailContent').text() ;
// 竞买公告
let jmgg = $('#NoticeDetail').text() ;
// 竞买须知
let jmxz = $('#ItemNotice').text() ;
// 尾款支付说明
let wkzfsm = $('#J_CasePayInfo').text();
// 竞买记录
let jmjl = $('#J_Confirmation').text();
getMianJi(bdwjs);
getMianJi(jmgg) ;
getShiJian(jmgg);
getAddr(bdwjs);
getBuyer(jmjl);
getPrice();
obj.url = window.location.href;
console.log(obj)
lp_send_data(JSON.stringify(obj));
}
function getPrice(){
$('#J_HoverShow td').each((idx,ele)=>{
var bmj = $(ele).find("span.pay-mark").first().text();
if(bmj.includes("变卖价") || bmj.includes("起拍价")){
obj.起拍价 = $(ele).find("span.J_Price").contents()[0].nodeValue;
}
if(bmj.includes("评估价")){
obj.评估价 = $(ele).find("span.J_Price").contents()[0].nodeValue;
}
})
}
function getAddr(source){
const regex = /位置\s+(.*?)\s+地图/g;
const match = regex.exec(source);
if (match) {
obj.地址 = match[1];
}
}
function getBuyer(source){
var regex = /姓名(.*)通过竞买号/g;
var match = regex.exec(source);
if (match) {
obj.成交人 = match[1];
}
match = source.match(/¥(\d+(,\d{3})*(\.\d+)?)\b/);
if (match) {
obj.成交价 = parseFloat(match[1].replace(/,/g, ''))
}
}
// 开始拍卖时间
function getShiJian(source){
var regex = /(\d{4}年\d{1,2}月\d{1,2}日.*?\d{1,2}时).*?\s*(\d{4}年\d{1,2}月\d{1,2}日.*?\d{1,2}时)/;
var match = source.match(regex);
if (match) {
const startTime = match[1];
const endTime = match[2];
obj.起拍时间 = startTime;
obj.结束时间 = endTime;
}else{
regex = /(\d{4}年\d{1,2}月\d{1,2}日.*?).*?\s*(\d{4}年\d{1,2}月\d{1,2}日.*?)/;;
match = source.match(regex);
if (match) {
const startTime = match[1];
const endTime = match[2];
obj.起拍时间 = startTime;
obj.结束时间 = endTime;
}
}
regex = /看样时间.*?(\d{4}年\d{1,2}月\d{1,2}日).*/;
match = source.match(regex);
if (match) {
obj.看样时间 = match[1];
}
}
function getMianJi(source){
var regex = /建筑面积.*?(\d+\.?\d+).*?/;
var result = source.match(regex);
if(result){
obj.建筑面积 = result[1];;
}
regex = /套内建筑面积.*?(\d+\.?\d).*?/;
result = source.match(regex);
if(result){
obj.套内面积 = result[1];
}
regex = /土地面积.*?(\d+\.?\d).*?/;
result = source.match(regex);
if(result){
obj.土地面积 = result[1];
}
// console.log(`建筑面积:${buildingArea} 平方米,套内面积:${innerArea} 平方米`);
}
async function doList(){
$(".tab-menu li").each((idx,item)=>{
urlList.push(item);
});
for (let i = 0; i < urlList.length; i++) {
$(urlList[i]).click();
await sleep(1000);
}
getData();
}
// 页面加载完成后,再执行脚本
window.addEventListener('load', function (evt) {
doList();
}, false);
// Your code here...
})();
关键代码段
@require https://cdn.bootcss.com/jquery/2.2.1/jquery.js@grant GM_xmlhttpRequest参考