nodjs 爬取喜欢的的背景图片

码农天地 - 2020-12-25 22:12:12

直接从幕布考过来的，懒得改排版了， ???，幕布地址： https://mubu.com/doc/_77_RCP9GG

前言

公司搬家了，换了新的电脑，感觉桌面有点空荡荡，想去搞几张高级的背景图片来装下13，于是便有了这个项目经过一番谷歌，感觉比较喜欢wallhaven这种风格的背景图，开始搞起 ~~~

搞搞搞

思路

解析页面，分析页面结构，然后拿到图片地址然后发起请求，只用写入本地

具体操作

解析页面，分析页面结构，然后拿到图片地址，

缩略图的页面结构详情图的页面结构缩略图地址： https://th.wallhaven.cc/small/rd/rd7dlq.jpg大图地址： https://w.wallhaven.cc/full/rd/wallhaven-rd7dlq.jpg

找规律

通过知道小图地址，找出大图地址，不同网站不同哦

找分页接口

知道网站的布局和规律之后，我们就可以开始编码了

编码思路：

先把所有的缩略图都爬取出来，爬完之后，再按照每次10个，10秒一次去下载，然后写入本地

具体步骤

Express 应用程序生成器来构建一个express应用，具体步骤参考官网

需要使用的库

consola 让日志好看点，方便观赏cheerio node 版本的jquery，用来解析页面axios 发送请求，下载图片

核心代码

发起请求，获取到缩略图的地址，然后存到一个数组里面轮询，调用分页接口获取缩略图地址下载图片，然后存储到本地电脑，楼主使用万年window，穷?效果，美滋滋5核心代码

const reptileUrl = 'https://wallhaven.cc/toplist?page='
// 本地存储路径
const saveDir = 'E:/myExpressDownload/toplist3/'
let currentPage = 1
const maxPage = 2

const imgUrlList = []


init()

/**
 * 爬取主程序入口
 */
async function init() {
    log.info('主程序开始启动，请您耐心等待~')
    log.info(`开始爬取${reptileUrl}的图片`)
    log.info(`文件将会被保存到以下地址中：${saveDir}`)

    // 判断本地存储文件夹是否存在
    if (!fs.existsSync(saveDir)) {
        log.info('目标文件不存在，开始创建新的文件夹~')
        fs.mkdirSync(saveDir);
    }

    if (currentPage > maxPage) {
        log.error('超出最大页数，停止收集源数据，开始爬取图片~')
        try {
            const downloadTimer = setInterval(() => {
                if(!imgUrlList.length){
                    log.error('没有更多了，收工了~~~~')
                    clearInterval(downloadTimer)
                    return false
                }
                log.success('开始轮询下载图片~')
                downloadPicture(imgUrlList.splice(0, 10))
            }, 10000);
        } catch (error) {
            log.error(`downloadPicture fail===>`, error)
        }
        
        // const downloadTimer  = setInterval(() => {
        //     if(!imgUrlList.length){
        //         log.error('没有图片了， 终于干完活了， 累死了~~~')
        //         clearInterval(downloadTimer)
        //     }
        //     log.success('开始轮询下载图片咯~')
        //     downloadPicture(imgUrlList.splice(0, 10))
        // }, 20000);
        return false
    }
    try {
        log.info(`开始爬取第${currentPage}页`)
        const websiteHtml = await axiosRequest.get(`${reptileUrl}${currentPage}`)
        const html = websiteHtml
        const $ = cheerio.load(html);
        $('.thumb img').each((i, v) => {
            const smallUrl =  v.attribs['data-src'] ||  v.attribs.src
            const urlArray = smallUrl.split('/')
            const fileName = urlArray[urlArray.length-1]
            const fullUrl = `https://w.wallhaven.cc/full/${fileName.substring(0, 2)}/wallhaven-${fileName}`
            smallUrl && imgUrlList.push({
                        smallUrl: smallUrl,
                        fullUrl: fullUrl,
                        fileName
                    })
            
        })

        log.error(`imgUrlList==>`, imgUrlList)
        sleep(init)
    } catch (error) {
        log.error(`爬取错误，错误信息如下==>`, error)
    }
}

function sleep(callback) {
    currentPage++
    const sleepTimeout = commonUtils.getRandomNumber(1, 10)
    let copyTimeout = sleepTimeout
    log.info(`爬太多了，有点累了，休息${sleepTimeout}秒，后再继续?`)
    const logTimer = setInterval(() => {
        if (copyTimeout <= 1) {
            clearInterval(logTimer)
        }
        log.success(`倒计时  ${--copyTimeout}   秒后开始继续干活~`)
    }, 1000);
    setTimeout(() => {
        typeof callback === 'function'  &&  callback()
    }, sleepTimeout * 1000);
}

async function findPicture(aTagArray) {
    log.info('开始解析图片内容~')
    const array = aTagArray.map(v => axiosRequest.get(v))
    return Promise.all(array)
}


function downloadPicture(pictureArray) {
    log.info('开始发送请求下载图片~')
    return pictureArray.reduce((accumulator, currentValue, currentIndex, array) => {
        console.log(currentValue.fullUrl, 'currentValue.fullUrl')
        const promise = axiosRequest.get(currentValue.fullUrl, {
            responseType: 'stream'
        }).then(res => {
            const result = res.pipe(fs.createWriteStream(`${saveDir}${currentValue.fileName}`))
            log.success(`成功保存图片到本地，保存位置==>${saveDir}${currentValue.fileName}`)
        })
        accumulator.push(promise)
        return accumulator
    }, [])
}

完整代码

https://github.com/qinyuanqib...

结束语

本文如有错误，欢迎指正，非常感谢觉得有用的老铁，点个双击，小红心走一波欢迎灌水，来呀，互相伤害哈 o(∩_∩)o 哈哈

特别申明：本文内容来源网络，版权归原作者所有，如有侵权请立即与我们联系（cy198701067573@163.com），我们将及时处理。

上一篇：【electron+vue3+ts实战便笺exe】二、electron+vue3开发内容

下一篇：项目模板管理脚手架ptm-cli开发

Tags 标签

加个好友，技术交流

码农天地

首页 HTML/CSS WEB服务器 PHP Linux 数据库异常报错插件工具美文欣赏站长直荐