arcology/arcology/agent_utils.py

137 lines
3.8 KiB
Python

# [[file:../scaffolding.org::*User-Agent break-down][User-Agent break-down:1]]
from __future__ import annotations
import logging
from typing import List
from enum import Enum
logger = logging.getLogger(__name__)
class AgentClassification(str, Enum):
NO_UA = "no-ua"
UNKNOWN = "unknown"
INTERNAL = "internal"
MATRIX = "matrix"
APP = "app"
FEDIVERSE = "fediverse"
FEED = "feed"
BOT = "bot"
AUTOMATION = "automation"
CRAWLER = "crawler"
BROWSER = "browser"
def __str__(self):
return self.value
@classmethod
def from_request(cls, request) -> AgentClassification:
user_agent = request.headers.get("User-Agent")
if user_agent == "":
return cls.NO_UA
if user_agent is None:
return cls.NO_UA
if 'prometheus' in user_agent:
return cls.INTERNAL
if 'feediverse' in user_agent:
return cls.INTERNAL
if 'Synapse' in user_agent:
return cls.MATRIX
if 'Element' in user_agent:
return cls.MATRIX
if 'SubwayTooter' in user_agent:
return cls.APP
if 'Dalvik' in user_agent:
return cls.APP
if 'Nextcloud-android' in user_agent:
return cls.APP
if 'Pleroma' in user_agent:
return cls.FEDIVERSE
if 'Mastodon/' in user_agent:
return cls.FEDIVERSE
if 'Akkoma' in user_agent:
return cls.FEDIVERSE
if 'Friendica' in user_agent:
return cls.FEDIVERSE
if 'FoundKey' in user_agent:
return cls.FEDIVERSE
if 'MissKey' in user_agent:
return cls.FEDIVERSE
if 'CalcKey' in user_agent:
return cls.FEDIVERSE
if 'gotosocial' in user_agent:
return cls.FEDIVERSE
if 'Epicyon' in user_agent:
return cls.FEDIVERSE
if 'feedparser' in user_agent:
return cls.FEED
if 'granary' in user_agent:
return cls.FEED
if 'Tiny Tiny RSS' in user_agent:
return cls.FEED
if 'Go_NEB' in user_agent:
return cls.FEED
if 'Gwene' in user_agent:
return cls.FEED
if 'Feedbin' in user_agent:
return cls.FEED
if 'NetNewsWire' in user_agent:
return cls.FEED
if 'FreshRSS' in user_agent:
return cls.FEED
if 'SimplePie' in user_agent:
return cls.FEED
if 'Elfeed' in user_agent:
return cls.FEED
if 'inoreader' in user_agent:
return cls.FEED
if 'Reeder' in user_agent:
return cls.FEED
if 'Miniflux' in user_agent:
return cls.FEED
if 'Bot' in user_agent:
return cls.BOT
if 'bot' in user_agent:
return cls.BOT
if 'Poduptime' in user_agent:
return cls.BOT
if 'aiohttp' in user_agent:
return cls.AUTOMATION
if 'python-requests' in user_agent:
return cls.AUTOMATION
if 'Go-http-client' in user_agent:
return cls.AUTOMATION
if 'curl/' in user_agent:
return cls.AUTOMATION
if 'wget/' in user_agent:
return cls.AUTOMATION
if 'keybase-proofs/' in user_agent:
return cls.AUTOMATION
if 'InternetMeasurement' in user_agent:
return cls.CRAWLER
if 'CensysInspect' in user_agent:
return cls.CRAWLER
if 'scaninfo@paloaltonetworks.com' in user_agent:
return cls.CRAWLER
if 'SEOlyt/' in user_agent:
return cls.CRAWLER
if 'Sogou web spider/' in user_agent:
return cls.CRAWLER
if 'Chrome/' in user_agent:
return cls.BROWSER
if 'Firefox/' in user_agent:
return cls.BROWSER
if 'DuckDuckGo/' in user_agent:
return cls.BROWSER
if 'Safari/' in user_agent:
return cls.BROWSER
if 'Opera/' in user_agent:
return cls.BROWSER
if 'ddg_android/' in user_agent:
return cls.BROWSER
logger.warn(f"Unknown User-Agent: {user_agent}")
return cls.UNKNOWN
# User-Agent break-down:1 ends here