JavaScript
JSDom
js
import axios from 'axios';
import { JSDOM, VirtualConsole } from 'jsdom';
import jschardet from 'jschardet';
import iconv from 'iconv-lite';
const url = 'https://www.example.com';
const response = await axios.get(url, {
responseType: 'arraybuffer',
});
// 编码检测
const encoding = jschardet.detect(response.data).encoding;
// 编码转换
const html = iconv.decode(response.data, encoding);
// 创建虚拟窗口
// 屏蔽控制台输出
const virtualConsole = new VirtualConsole();
virtualConsole.on('error', () => {});
virtualConsole.on('warn', () => {});
virtualConsole.on('info', () => {});
virtualConsole.on('dir', () => {});
virtualConsole.on('jsdomError', () => {});
const { window } = new JSDOM(html, {
runScripts: 'dangerously',
virtualConsole,
});
const document = window.document;
const result = document.querySelector('h1').textContent;
// NodeList
const results = document.querySelectorAll('h1');
const text = Array.from(results).map(result => result.textContent);
const text2 = [...results].map(result => result.textContent);
const text3 = results.forEach(result => result.textContent);Cheerio
js
import axios from 'axios';
import * as cheerio from 'cheerio';
import jschardet from 'jschardet';
import iconv from 'iconv-lite';
const url = 'https://www.example.com';
const response = await axios.get(url, {
responseType: 'arraybuffer',
});
const encoding = jschardet.detect(response.data).encoding;
const html = iconv.decode(response.data, encoding);
const $ = cheerio.load(html);
const result = $('h1').text();
// cheerio返回的是cheerio对象,不是NodeList
const results = $('h1');
const imgs = $('img');
const imgSrcs = imgs.map((i, img) => $(img).attr('src')).get();
const link = $('a');
const links = $('a')
.map((i, a) => $(a).attr('href'))
.get();