Comprehensive Flashscore.com scraper built with Playwright and Cheerio. Collects match data, lineups, statistics and schedules.
const { chromium } = require('playwright');
const StealthPlugin = require('puppeteer-extra-plugin-stealth')();
const UserAgent = require('user-agents');
const ProxyManager = require('../utils/proxy-manager');
class BaseScraper {
constructor() {
this.proxyManager = new ProxyManager();
this.stealthPlugin = StealthPlugin;
this.userAgent = new UserAgent();
}
async launchBrowser() {
const proxy = this.proxyManager.getNextProxy();
this.browser = await chromium.launch({
headless: true,
proxy: {
server: proxy,
},
args: [
'--disable-blink-features=AutomationControlled',
'--no-sandbox'
],
});
this.context = await this.browser.newContext({
userAgent: this.userAgent.toString(),
viewport: { width: 1920, height: 1080 },
});
this.page = await this.context.newPage();
// Block unnecessary resources
await this.page.route(/\.(jpg|jpeg|png|gif|css|ads|adservice|googleadservices|doubleclick)/, route => route.abort());
// Enable stealth
await this.stealthPlugin.onPageCreated(this.page);
}
async navigateTo(url) {
await this.page.goto(url, { waitUntil: 'networkidle', timeout: 60000 });
await this.randomDelay(2000, 8000);
}
async randomDelay(min, max) {
const delay = Math.floor(Math.random() * (max - min + 1)) + min;
await this.page.waitForTimeout(delay);
}
async closeBrowser() {
await this.browser.close();
}
}
module.exports = BaseScraper;
const { chromium } = require('playwright');
const singleton = Symbol();
const singletonEnforcer = Symbol();
class BrowserManager {
constructor(enforcer) {
if (enforcer !== singletonEnforcer) {
throw new Error('Cannot construct singleton');
}
this.browser = null;
}
static get instance() {
if (!this[singleton]) {
this[singleton] = new BrowserManager(singletonEnforcer);
}
return this[singleton];
}
async launch() {
if (!this.browser || !this.browser.isConnected()) {
this.browser = await chromium.launch({
headless: true,
args: [
'--disable-blink-features=AutomationControlled',
'--no-sandbox'
],
});
}
return this.browser;
}
async newContext() {
const browser = await this.launch();
return browser.newContext({
viewport: { width: 1920, height: 1080 },
userAgent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36'
});
}
async close() {
if (this.browser) {
await this.browser.close();
this.browser = null;
}
}
}
module.exports = BrowserManager;
const BaseScraper = require('./base-scraper');
const cheerio = require('cheerio');
const MatchData = require('../models/match-data');
const DataProcessor = require('../utils/data-processor');
class MatchSummaryScraper extends BaseScraper {
constructor(matchUrl) {
super();
this.matchUrl = matchUrl;
this.matchData = new MatchData();
}
async scrape() {
try {
await this.launchBrowser();
await this.navigateTo(this.matchUrl);
await this.page.waitForSelector('.matchSummary', { timeout: 10000 });
const html = await this.page.content();
this.matchData = this.parseMatchSummary(html);
await this.closeBrowser();
return this.matchData;
} catch (error) {
console.error('Error scraping match summary:', error);
await this.closeBrowser();
throw error;
}
}
parseMatchSummary(html) {
const $ = cheerio.load(html);
const match = new MatchData();
// Parse teams and scores
match.homeTeam = $('.home-team-name').text().trim();
match.awayTeam = $('.away-team-name').text().trim();
match.score = $('.score').text().trim();
match.halfTimeScore = $('.half-time-score').text().trim();
// Parse date and time
match.date = $('.match-date').attr('data-date');
match.time = $('.match-time').attr('data-time');
// Parse match events
$('.event-row').each((i, element) => {
const event = {
type: $(element).find('.event-type').text().trim(),
time: $(element).find('.event-time').text().trim(),
player: $(element).find('.event-player').text().trim(),
team: $(element).attr('class').includes('home') ? 'home' : 'away'
};
match.events.push(event);
});
// Data normalization
match.date = DataProcessor.normalizeDate(match.date);
match.events = DataProcessor.normalizeEvents(match.events);
return match;
}
}
module.exports = MatchSummaryScraper;
class MatchData {
constructor() {
this.id = '';
this.homeTeam = '';
this.awayTeam = '';
this.competition = '';
this.status = '';
this.date = '';
this.time = '';
this.score = '';
this.halfTimeScore = '';
this.venue = '';
this.attendance = 0;
this.referee = '';
this.events = [];
this.statistics = {};
this.lastUpdated = new Date();
}
addEvent(event) {
this.events.push(event);
}
addStatistic(type, value) {
this.statistics[type] = value;
}
toJSON() {
return {
id: this.id,
homeTeam: this.homeTeam,
awayTeam: this.awayTeam,
competition: this.competition,
status: this.status,
date: this.date,
time: this.time,
score: this.score,
halfTimeScore: this.halfTimeScore,
venue: this.venue,
attendance: this.attendance,
referee: this.referee,
events: this.events,
statistics: this.statistics,
lastUpdated: this.lastUpdated.toISOString()
};
}
}
module.exports = MatchData;
Avoid detection with advanced techniques like randomized user agents, request delays, and proxy rotation.
Regularly collect data using cron scheduling and automated retries with exponential backoff.
Clean separation of concerns with independent modules for scraping, data processing, and utilities.
# Create project directory
mkdir flashscore-scraper
cd flashscore-scraper
# Initialize npm project
npm init -y
# Install dependencies
npm install playwright cheerio axios dotenv fs-extra node-cron
Create a .env file with your configuration:
# Proxy configuration
PROXY_SERVERS="http://user:pass@proxy1.com:8080,http://user:pass@proxy2.com:8080"
# Flashscore base URL
BASE_URL="https://www.flashscore.com"
# Schedule - every day at midnight
CRON_SCHEDULE="0 0 * * *"
# Output directory
DATA_DIR="./data"
Execute the main script directly or set up a cron job:
# Run once for a specific match
node src/index.js --matchId "123456"
# Or run on a schedule according to settings.js
node src/index.js --cron