137 lines
3.8 KiB
Python
137 lines
3.8 KiB
Python
# [[file:../scaffolding.org::*User-Agent break-down][User-Agent break-down:1]]
|
|
from __future__ import annotations
|
|
import logging
|
|
from typing import List
|
|
from enum import Enum
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
class AgentClassification(str, Enum):
|
|
NO_UA = "no-ua"
|
|
UNKNOWN = "unknown"
|
|
INTERNAL = "internal"
|
|
MATRIX = "matrix"
|
|
APP = "app"
|
|
FEDIVERSE = "fediverse"
|
|
FEED = "feed"
|
|
BOT = "bot"
|
|
AUTOMATION = "automation"
|
|
CRAWLER = "crawler"
|
|
BROWSER = "browser"
|
|
|
|
|
|
def __str__(self):
|
|
return self.value
|
|
|
|
@classmethod
|
|
def from_request(cls, request) -> AgentClassification:
|
|
user_agent = request.headers.get("User-Agent")
|
|
if user_agent == "":
|
|
return cls.NO_UA
|
|
if user_agent is None:
|
|
return cls.NO_UA
|
|
if 'prometheus' in user_agent:
|
|
return cls.INTERNAL
|
|
if 'feediverse' in user_agent:
|
|
return cls.INTERNAL
|
|
if 'Synapse' in user_agent:
|
|
return cls.MATRIX
|
|
if 'Element' in user_agent:
|
|
return cls.MATRIX
|
|
if 'SubwayTooter' in user_agent:
|
|
return cls.APP
|
|
if 'Dalvik' in user_agent:
|
|
return cls.APP
|
|
if 'Nextcloud-android' in user_agent:
|
|
return cls.APP
|
|
if 'Pleroma' in user_agent:
|
|
return cls.FEDIVERSE
|
|
if 'Mastodon/' in user_agent:
|
|
return cls.FEDIVERSE
|
|
if 'Akkoma' in user_agent:
|
|
return cls.FEDIVERSE
|
|
if 'Friendica' in user_agent:
|
|
return cls.FEDIVERSE
|
|
if 'FoundKey' in user_agent:
|
|
return cls.FEDIVERSE
|
|
if 'MissKey' in user_agent:
|
|
return cls.FEDIVERSE
|
|
if 'CalcKey' in user_agent:
|
|
return cls.FEDIVERSE
|
|
if 'gotosocial' in user_agent:
|
|
return cls.FEDIVERSE
|
|
if 'Epicyon' in user_agent:
|
|
return cls.FEDIVERSE
|
|
if 'feedparser' in user_agent:
|
|
return cls.FEED
|
|
if 'granary' in user_agent:
|
|
return cls.FEED
|
|
if 'Tiny Tiny RSS' in user_agent:
|
|
return cls.FEED
|
|
if 'Go_NEB' in user_agent:
|
|
return cls.FEED
|
|
if 'Gwene' in user_agent:
|
|
return cls.FEED
|
|
if 'Feedbin' in user_agent:
|
|
return cls.FEED
|
|
if 'NetNewsWire' in user_agent:
|
|
return cls.FEED
|
|
if 'FreshRSS' in user_agent:
|
|
return cls.FEED
|
|
if 'SimplePie' in user_agent:
|
|
return cls.FEED
|
|
if 'Elfeed' in user_agent:
|
|
return cls.FEED
|
|
if 'inoreader' in user_agent:
|
|
return cls.FEED
|
|
if 'Reeder' in user_agent:
|
|
return cls.FEED
|
|
if 'Miniflux' in user_agent:
|
|
return cls.FEED
|
|
if 'Bot' in user_agent:
|
|
return cls.BOT
|
|
if 'bot' in user_agent:
|
|
return cls.BOT
|
|
if 'Poduptime' in user_agent:
|
|
return cls.BOT
|
|
if 'aiohttp' in user_agent:
|
|
return cls.AUTOMATION
|
|
if 'python-requests' in user_agent:
|
|
return cls.AUTOMATION
|
|
if 'Go-http-client' in user_agent:
|
|
return cls.AUTOMATION
|
|
if 'curl/' in user_agent:
|
|
return cls.AUTOMATION
|
|
if 'wget/' in user_agent:
|
|
return cls.AUTOMATION
|
|
if 'keybase-proofs/' in user_agent:
|
|
return cls.AUTOMATION
|
|
if 'InternetMeasurement' in user_agent:
|
|
return cls.CRAWLER
|
|
if 'CensysInspect' in user_agent:
|
|
return cls.CRAWLER
|
|
if 'scaninfo@paloaltonetworks.com' in user_agent:
|
|
return cls.CRAWLER
|
|
if 'SEOlyt/' in user_agent:
|
|
return cls.CRAWLER
|
|
if 'Sogou web spider/' in user_agent:
|
|
return cls.CRAWLER
|
|
if 'Chrome/' in user_agent:
|
|
return cls.BROWSER
|
|
if 'Firefox/' in user_agent:
|
|
return cls.BROWSER
|
|
if 'DuckDuckGo/' in user_agent:
|
|
return cls.BROWSER
|
|
if 'Safari/' in user_agent:
|
|
return cls.BROWSER
|
|
if 'Opera/' in user_agent:
|
|
return cls.BROWSER
|
|
if 'ddg_android/' in user_agent:
|
|
return cls.BROWSER
|
|
|
|
|
|
logger.warn(f"Unknown User-Agent: {user_agent}")
|
|
|
|
return cls.UNKNOWN
|
|
# User-Agent break-down:1 ends here
|