mirror of
https://github.com/Benexl/FastAnime.git
synced 2026-04-28 11:53:08 -07:00
Delete viu_media/libs/provider directory
This commit is contained in:
@@ -1 +0,0 @@
|
|||||||
|
|
||||||
@@ -1,39 +0,0 @@
|
|||||||
import re
|
|
||||||
|
|
||||||
from .....core.constants import GRAPHQL_DIR
|
|
||||||
|
|
||||||
SERVERS_AVAILABLE = [
|
|
||||||
"sharepoint",
|
|
||||||
"dropbox",
|
|
||||||
"gogoanime",
|
|
||||||
"weTransfer",
|
|
||||||
"wixmp",
|
|
||||||
"Yt",
|
|
||||||
"mp4-upload",
|
|
||||||
]
|
|
||||||
API_BASE_URL = "allanime.day"
|
|
||||||
API_GRAPHQL_REFERER = "https://allanime.to/"
|
|
||||||
API_GRAPHQL_ENDPOINT = f"https://api.{API_BASE_URL}/api/"
|
|
||||||
API_GRAPHQL_HEADERS= {
|
|
||||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
|
|
||||||
"Content-Type": "application/json",
|
|
||||||
"Origin": f"{API_GRAPHQL_REFERER}",
|
|
||||||
}
|
|
||||||
|
|
||||||
# search constants
|
|
||||||
DEFAULT_COUNTRY_OF_ORIGIN = "all"
|
|
||||||
DEFAULT_NSFW = True
|
|
||||||
DEFAULT_UNKNOWN = True
|
|
||||||
DEFAULT_PER_PAGE = 40
|
|
||||||
DEFAULT_PAGE = 1
|
|
||||||
|
|
||||||
# regex stuff
|
|
||||||
MP4_SERVER_JUICY_STREAM_REGEX = re.compile(
|
|
||||||
r"video/mp4\",src:\"(https?://.*/video\.mp4)\""
|
|
||||||
)
|
|
||||||
|
|
||||||
# graphql files
|
|
||||||
_GQL_QUERIES = GRAPHQL_DIR / "allanime" / "queries"
|
|
||||||
SEARCH_GQL = _GQL_QUERIES / "search.gql"
|
|
||||||
ANIME_GQL = _GQL_QUERIES / "anime.gql"
|
|
||||||
EPISODE_GQL = _GQL_QUERIES / "episodes.gql"
|
|
||||||
@@ -1,3 +0,0 @@
|
|||||||
from .extractor import extract_server
|
|
||||||
|
|
||||||
__all__ = ["extract_server"]
|
|
||||||
@@ -1,31 +0,0 @@
|
|||||||
from ...types import EpisodeStream, Server
|
|
||||||
from ..constants import API_BASE_URL
|
|
||||||
from ..types import AllAnimeEpisode, AllAnimeSource
|
|
||||||
from .base import BaseExtractor
|
|
||||||
|
|
||||||
|
|
||||||
class AkExtractor(BaseExtractor):
|
|
||||||
@classmethod
|
|
||||||
def extract(
|
|
||||||
cls,
|
|
||||||
url,
|
|
||||||
client,
|
|
||||||
episode_number: str,
|
|
||||||
episode: AllAnimeEpisode,
|
|
||||||
source: AllAnimeSource,
|
|
||||||
) -> Server:
|
|
||||||
response = client.get(
|
|
||||||
f"https://{API_BASE_URL}{url.replace('clock', 'clock.json')}",
|
|
||||||
timeout=10,
|
|
||||||
)
|
|
||||||
response.raise_for_status()
|
|
||||||
streams = response.json()
|
|
||||||
|
|
||||||
return Server(
|
|
||||||
name="Ak",
|
|
||||||
links=[
|
|
||||||
EpisodeStream(link=link, quality="1080") for link in streams["links"]
|
|
||||||
],
|
|
||||||
episode_title=episode["notes"],
|
|
||||||
headers={"Referer": f"https://{API_BASE_URL}/"},
|
|
||||||
)
|
|
||||||
@@ -1,20 +0,0 @@
|
|||||||
from abc import ABC, abstractmethod
|
|
||||||
|
|
||||||
from httpx import Client
|
|
||||||
|
|
||||||
from ...types import Server
|
|
||||||
from ..types import AllAnimeEpisode, AllAnimeSource
|
|
||||||
|
|
||||||
|
|
||||||
class BaseExtractor(ABC):
|
|
||||||
@classmethod
|
|
||||||
@abstractmethod
|
|
||||||
def extract(
|
|
||||||
cls,
|
|
||||||
url: str,
|
|
||||||
client: Client,
|
|
||||||
episode_number: str,
|
|
||||||
episode: AllAnimeEpisode,
|
|
||||||
source: AllAnimeSource,
|
|
||||||
) -> Server | None:
|
|
||||||
pass
|
|
||||||
@@ -1,31 +0,0 @@
|
|||||||
from ...types import EpisodeStream, Server
|
|
||||||
from ..constants import API_BASE_URL
|
|
||||||
from ..types import AllAnimeEpisode, AllAnimeSource
|
|
||||||
from .base import BaseExtractor
|
|
||||||
|
|
||||||
|
|
||||||
class SakExtractor(BaseExtractor):
|
|
||||||
@classmethod
|
|
||||||
def extract(
|
|
||||||
cls,
|
|
||||||
url,
|
|
||||||
client,
|
|
||||||
episode_number: str,
|
|
||||||
episode: AllAnimeEpisode,
|
|
||||||
source: AllAnimeSource,
|
|
||||||
) -> Server:
|
|
||||||
response = client.get(
|
|
||||||
f"https://{API_BASE_URL}{url.replace('clock', 'clock.json')}",
|
|
||||||
timeout=10,
|
|
||||||
)
|
|
||||||
response.raise_for_status()
|
|
||||||
streams = response.json()
|
|
||||||
|
|
||||||
return Server(
|
|
||||||
name="dropbox",
|
|
||||||
links=[
|
|
||||||
EpisodeStream(link=link, quality="1080") for link in streams["links"]
|
|
||||||
],
|
|
||||||
episode_title=episode["notes"],
|
|
||||||
headers={"Referer": f"https://{API_BASE_URL}/"},
|
|
||||||
)
|
|
||||||
@@ -1,65 +0,0 @@
|
|||||||
from httpx import Client
|
|
||||||
|
|
||||||
from ...types import Server
|
|
||||||
from ..types import AllAnimeEpisode, AllAnimeSource
|
|
||||||
from ..utils import debug_extractor, logger, one_digit_symmetric_xor
|
|
||||||
from .ak import AkExtractor
|
|
||||||
from .dropbox import SakExtractor
|
|
||||||
from .filemoon import FmHlsExtractor, OkExtractor
|
|
||||||
from .gogoanime import Lufmp4Extractor
|
|
||||||
from .mp4_upload import Mp4Extractor
|
|
||||||
from .sharepoint import Smp4Extractor
|
|
||||||
from .streamsb import SsHlsExtractor
|
|
||||||
from .vid_mp4 import VidMp4Extractor
|
|
||||||
from .we_transfer import KirExtractor
|
|
||||||
from .wixmp import DefaultExtractor
|
|
||||||
from .yt_mp4 import YtExtractor
|
|
||||||
|
|
||||||
AVAILABLE_SOURCES = {
|
|
||||||
"Sak": SakExtractor,
|
|
||||||
"S-mp4": Smp4Extractor,
|
|
||||||
"Luf-Mp4": Lufmp4Extractor,
|
|
||||||
"Default": DefaultExtractor,
|
|
||||||
"Yt-mp4": YtExtractor,
|
|
||||||
"Kir": KirExtractor,
|
|
||||||
"Mp4": Mp4Extractor,
|
|
||||||
}
|
|
||||||
OTHER_SOURCES = {
|
|
||||||
"Ak": AkExtractor,
|
|
||||||
"Vid-mp4": VidMp4Extractor,
|
|
||||||
"Ok": OkExtractor,
|
|
||||||
"Ss-Hls": SsHlsExtractor,
|
|
||||||
"Fm-Hls": FmHlsExtractor,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
@debug_extractor
|
|
||||||
def extract_server(
|
|
||||||
client: Client,
|
|
||||||
episode_number: str,
|
|
||||||
episode: AllAnimeEpisode,
|
|
||||||
source: AllAnimeSource,
|
|
||||||
) -> Server | None:
|
|
||||||
url = source.get("sourceUrl")
|
|
||||||
if not url:
|
|
||||||
logger.debug(f"Url not found in source: {source}")
|
|
||||||
return
|
|
||||||
|
|
||||||
if url.startswith("--"):
|
|
||||||
url = one_digit_symmetric_xor(56, url[2:])
|
|
||||||
|
|
||||||
logger.debug(f"Decrypting url for source: {source['sourceName']}")
|
|
||||||
if source["sourceName"] in OTHER_SOURCES:
|
|
||||||
logger.debug(f"Found {source['sourceName']} but ignoring")
|
|
||||||
return
|
|
||||||
|
|
||||||
if source["sourceName"] not in AVAILABLE_SOURCES:
|
|
||||||
logger.debug(
|
|
||||||
f"Found {source['sourceName']} but did not expect it, its time to scrape lol"
|
|
||||||
)
|
|
||||||
return
|
|
||||||
logger.debug(f"Found {source['sourceName']}")
|
|
||||||
|
|
||||||
return AVAILABLE_SOURCES[source["sourceName"]].extract(
|
|
||||||
url, client, episode_number, episode, source
|
|
||||||
)
|
|
||||||
@@ -1,62 +0,0 @@
|
|||||||
from ...types import EpisodeStream, Server
|
|
||||||
from ..constants import API_BASE_URL, MP4_SERVER_JUICY_STREAM_REGEX
|
|
||||||
from ..types import AllAnimeEpisode, AllAnimeSource
|
|
||||||
from .base import BaseExtractor
|
|
||||||
|
|
||||||
|
|
||||||
# TODO: requires decoding obsfucated js (filemoon)
|
|
||||||
class FmHlsExtractor(BaseExtractor):
|
|
||||||
@classmethod
|
|
||||||
def extract(
|
|
||||||
cls,
|
|
||||||
url,
|
|
||||||
client,
|
|
||||||
episode_number: str,
|
|
||||||
episode: AllAnimeEpisode,
|
|
||||||
source: AllAnimeSource,
|
|
||||||
) -> Server:
|
|
||||||
response = client.get(
|
|
||||||
f"https://{API_BASE_URL}{url.replace('clock', 'clock.json')}",
|
|
||||||
timeout=10,
|
|
||||||
)
|
|
||||||
response.raise_for_status()
|
|
||||||
|
|
||||||
embed_html = response.text.replace(" ", "").replace("\n", "")
|
|
||||||
vid = MP4_SERVER_JUICY_STREAM_REGEX.search(embed_html)
|
|
||||||
if not vid:
|
|
||||||
raise Exception("")
|
|
||||||
return Server(
|
|
||||||
name="dropbox",
|
|
||||||
links=[EpisodeStream(link=vid.group(1), quality="1080")],
|
|
||||||
episode_title=episode["notes"],
|
|
||||||
headers={"Referer": "https://www.mp4upload.com/"},
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
# TODO: requires decoding obsfucated js (filemoon)
|
|
||||||
class OkExtractor(BaseExtractor):
|
|
||||||
@classmethod
|
|
||||||
def extract(
|
|
||||||
cls,
|
|
||||||
url,
|
|
||||||
client,
|
|
||||||
episode_number: str,
|
|
||||||
episode: AllAnimeEpisode,
|
|
||||||
source: AllAnimeSource,
|
|
||||||
) -> Server:
|
|
||||||
response = client.get(
|
|
||||||
f"https://{API_BASE_URL}{url.replace('clock', 'clock.json')}",
|
|
||||||
timeout=10,
|
|
||||||
)
|
|
||||||
response.raise_for_status()
|
|
||||||
|
|
||||||
embed_html = response.text.replace(" ", "").replace("\n", "")
|
|
||||||
vid = MP4_SERVER_JUICY_STREAM_REGEX.search(embed_html)
|
|
||||||
if not vid:
|
|
||||||
raise Exception("")
|
|
||||||
return Server(
|
|
||||||
name="dropbox",
|
|
||||||
links=[EpisodeStream(link=vid.group(1), quality="1080")],
|
|
||||||
episode_title=episode["notes"],
|
|
||||||
headers={"Referer": "https://www.mp4upload.com/"},
|
|
||||||
)
|
|
||||||
@@ -1,34 +0,0 @@
|
|||||||
from ...types import EpisodeStream, Server
|
|
||||||
from ..constants import API_BASE_URL
|
|
||||||
from ..types import AllAnimeEpisode, AllAnimeEpisodeStreams, AllAnimeSource
|
|
||||||
from .base import BaseExtractor
|
|
||||||
|
|
||||||
|
|
||||||
class Lufmp4Extractor(BaseExtractor):
|
|
||||||
@classmethod
|
|
||||||
def extract(
|
|
||||||
cls,
|
|
||||||
url,
|
|
||||||
client,
|
|
||||||
episode_number: str,
|
|
||||||
episode: AllAnimeEpisode,
|
|
||||||
source: AllAnimeSource,
|
|
||||||
) -> Server:
|
|
||||||
response = client.get(
|
|
||||||
f"https://{API_BASE_URL}{url.replace('clock', 'clock.json')}",
|
|
||||||
timeout=10,
|
|
||||||
)
|
|
||||||
response.raise_for_status()
|
|
||||||
streams: AllAnimeEpisodeStreams = response.json()
|
|
||||||
|
|
||||||
return Server(
|
|
||||||
name="gogoanime",
|
|
||||||
links=[
|
|
||||||
EpisodeStream(
|
|
||||||
link=stream["link"], quality="1080", format=stream["resolutionStr"]
|
|
||||||
)
|
|
||||||
for stream in streams["links"]
|
|
||||||
],
|
|
||||||
episode_title=episode["notes"],
|
|
||||||
headers={"Referer": f"https://{API_BASE_URL}/"},
|
|
||||||
)
|
|
||||||
@@ -1,32 +0,0 @@
|
|||||||
from ...types import EpisodeStream, Server
|
|
||||||
from ..constants import MP4_SERVER_JUICY_STREAM_REGEX
|
|
||||||
from ..utils import logger
|
|
||||||
from .base import BaseExtractor
|
|
||||||
|
|
||||||
|
|
||||||
class Mp4Extractor(BaseExtractor):
|
|
||||||
@classmethod
|
|
||||||
def extract(cls, url, client, episode_number, episode, source):
|
|
||||||
response = client.get(url, timeout=10, follow_redirects=True)
|
|
||||||
response.raise_for_status()
|
|
||||||
|
|
||||||
embed_html = response.text.replace(" ", "").replace("\n", "")
|
|
||||||
|
|
||||||
# NOTE: some of the video were deleted so the embed html will just be "Filewasdeleted"
|
|
||||||
vid = MP4_SERVER_JUICY_STREAM_REGEX.search(embed_html)
|
|
||||||
if not vid:
|
|
||||||
if embed_html == "Filewasdeleted":
|
|
||||||
logger.debug(
|
|
||||||
"Failed to extract stream url from mp4-uploads. Reason: Filewasdeleted"
|
|
||||||
)
|
|
||||||
return
|
|
||||||
logger.debug(
|
|
||||||
f"Failed to extract stream url from mp4-uploads. Reason: unknown. Embed html: {embed_html}"
|
|
||||||
)
|
|
||||||
return
|
|
||||||
return Server(
|
|
||||||
name="mp4-upload",
|
|
||||||
links=[EpisodeStream(link=vid.group(1), quality="1080")],
|
|
||||||
episode_title=episode["notes"],
|
|
||||||
headers={"Referer": "https://www.mp4upload.com/"},
|
|
||||||
)
|
|
||||||
@@ -1,28 +0,0 @@
|
|||||||
from ...types import EpisodeStream, Server
|
|
||||||
from ..constants import API_BASE_URL
|
|
||||||
from ..types import AllAnimeEpisodeStreams
|
|
||||||
from .base import BaseExtractor
|
|
||||||
|
|
||||||
|
|
||||||
class Smp4Extractor(BaseExtractor):
|
|
||||||
@classmethod
|
|
||||||
def extract(cls, url, client, episode_number, episode, source):
|
|
||||||
response = client.get(
|
|
||||||
f"https://{API_BASE_URL}{url.replace('clock', 'clock.json')}",
|
|
||||||
timeout=10,
|
|
||||||
)
|
|
||||||
response.raise_for_status()
|
|
||||||
streams: AllAnimeEpisodeStreams = response.json()
|
|
||||||
return Server(
|
|
||||||
name="sharepoint",
|
|
||||||
links=[
|
|
||||||
EpisodeStream(
|
|
||||||
link=stream["link"],
|
|
||||||
quality="1080",
|
|
||||||
format=stream["resolutionStr"],
|
|
||||||
)
|
|
||||||
for stream in streams["links"]
|
|
||||||
],
|
|
||||||
episode_title=episode["notes"],
|
|
||||||
headers={"Referer": f"https://{API_BASE_URL}/"},
|
|
||||||
)
|
|
||||||
@@ -1,32 +0,0 @@
|
|||||||
from ...types import EpisodeStream, Server
|
|
||||||
from ..constants import API_BASE_URL
|
|
||||||
from ..types import AllAnimeEpisode, AllAnimeSource
|
|
||||||
from .base import BaseExtractor
|
|
||||||
|
|
||||||
|
|
||||||
class SsHlsExtractor(BaseExtractor):
|
|
||||||
@classmethod
|
|
||||||
def extract(
|
|
||||||
cls,
|
|
||||||
url,
|
|
||||||
client,
|
|
||||||
episode_number: str,
|
|
||||||
episode: AllAnimeEpisode,
|
|
||||||
source: AllAnimeSource,
|
|
||||||
) -> Server:
|
|
||||||
# TODO: requires some serious work i think : )
|
|
||||||
response = client.get(
|
|
||||||
url,
|
|
||||||
timeout=10,
|
|
||||||
)
|
|
||||||
response.raise_for_status()
|
|
||||||
streams = response.json()["links"]
|
|
||||||
|
|
||||||
return Server(
|
|
||||||
name="StreamSb",
|
|
||||||
links=[
|
|
||||||
EpisodeStream(link=link, quality="1080") for link in streams["links"]
|
|
||||||
],
|
|
||||||
episode_title=episode["notes"],
|
|
||||||
headers={"Referer": f"https://{API_BASE_URL}/"},
|
|
||||||
)
|
|
||||||
@@ -1,32 +0,0 @@
|
|||||||
from ...types import EpisodeStream, Server
|
|
||||||
from ..constants import API_BASE_URL
|
|
||||||
from ..types import AllAnimeEpisode, AllAnimeSource
|
|
||||||
from .base import BaseExtractor
|
|
||||||
|
|
||||||
|
|
||||||
# TODO: requires some serious work i think : )
|
|
||||||
class VidMp4Extractor(BaseExtractor):
|
|
||||||
@classmethod
|
|
||||||
def extract(
|
|
||||||
cls,
|
|
||||||
url,
|
|
||||||
client,
|
|
||||||
episode_number: str,
|
|
||||||
episode: AllAnimeEpisode,
|
|
||||||
source: AllAnimeSource,
|
|
||||||
) -> Server:
|
|
||||||
response = client.get(
|
|
||||||
f"https://{API_BASE_URL}{url.replace('clock', 'clock.json')}",
|
|
||||||
timeout=10,
|
|
||||||
)
|
|
||||||
response.raise_for_status()
|
|
||||||
streams = response.json()
|
|
||||||
|
|
||||||
return Server(
|
|
||||||
name="Vid-mp4",
|
|
||||||
links=[
|
|
||||||
EpisodeStream(link=link, quality="1080") for link in streams["links"]
|
|
||||||
],
|
|
||||||
episode_title=episode["notes"],
|
|
||||||
headers={"Referer": f"https://{API_BASE_URL}/"},
|
|
||||||
)
|
|
||||||
@@ -1,31 +0,0 @@
|
|||||||
from ...types import EpisodeStream, Server
|
|
||||||
from ..constants import API_BASE_URL
|
|
||||||
from ..types import AllAnimeEpisode, AllAnimeSource
|
|
||||||
from .base import BaseExtractor
|
|
||||||
|
|
||||||
|
|
||||||
class KirExtractor(BaseExtractor):
|
|
||||||
@classmethod
|
|
||||||
def extract(
|
|
||||||
cls,
|
|
||||||
url,
|
|
||||||
client,
|
|
||||||
episode_number: str,
|
|
||||||
episode: AllAnimeEpisode,
|
|
||||||
source: AllAnimeSource,
|
|
||||||
) -> Server:
|
|
||||||
response = client.get(
|
|
||||||
f"https://{API_BASE_URL}{url.replace('clock', 'clock.json')}",
|
|
||||||
timeout=10,
|
|
||||||
)
|
|
||||||
response.raise_for_status()
|
|
||||||
streams = response.json()
|
|
||||||
|
|
||||||
return Server(
|
|
||||||
name="weTransfer",
|
|
||||||
links=[
|
|
||||||
EpisodeStream(link=link, quality="1080") for link in streams["links"]
|
|
||||||
],
|
|
||||||
episode_title=episode["notes"],
|
|
||||||
headers={"Referer": f"https://{API_BASE_URL}/"},
|
|
||||||
)
|
|
||||||
@@ -1,26 +0,0 @@
|
|||||||
from ...types import EpisodeStream, Server
|
|
||||||
from ..constants import API_BASE_URL
|
|
||||||
from ..types import AllAnimeEpisodeStreams
|
|
||||||
from .base import BaseExtractor
|
|
||||||
|
|
||||||
|
|
||||||
class DefaultExtractor(BaseExtractor):
|
|
||||||
@classmethod
|
|
||||||
def extract(cls, url, client, episode_number, episode, source):
|
|
||||||
response = client.get(
|
|
||||||
f"https://{API_BASE_URL}{url.replace('clock', 'clock.json')}",
|
|
||||||
timeout=10,
|
|
||||||
)
|
|
||||||
response.raise_for_status()
|
|
||||||
streams: AllAnimeEpisodeStreams = response.json()
|
|
||||||
return Server(
|
|
||||||
name="wixmp",
|
|
||||||
links=[
|
|
||||||
EpisodeStream(
|
|
||||||
link=stream["link"], quality="1080", format=stream["resolutionStr"]
|
|
||||||
)
|
|
||||||
for stream in streams["links"]
|
|
||||||
],
|
|
||||||
episode_title=episode["notes"],
|
|
||||||
headers={"Referer": f"https://{API_BASE_URL}/"},
|
|
||||||
)
|
|
||||||
@@ -1,22 +0,0 @@
|
|||||||
from ...types import EpisodeStream, Server
|
|
||||||
from ..constants import API_BASE_URL
|
|
||||||
from ..types import AllAnimeEpisode, AllAnimeSource
|
|
||||||
from .base import BaseExtractor
|
|
||||||
|
|
||||||
|
|
||||||
class YtExtractor(BaseExtractor):
|
|
||||||
@classmethod
|
|
||||||
def extract(
|
|
||||||
cls,
|
|
||||||
url,
|
|
||||||
client,
|
|
||||||
episode_number: str,
|
|
||||||
episode: AllAnimeEpisode,
|
|
||||||
source: AllAnimeSource,
|
|
||||||
) -> Server:
|
|
||||||
return Server(
|
|
||||||
name="Yt",
|
|
||||||
links=[EpisodeStream(link=url, quality="1080")],
|
|
||||||
episode_title=episode["notes"],
|
|
||||||
headers={"Referer": f"https://{API_BASE_URL}/"},
|
|
||||||
)
|
|
||||||
@@ -1,58 +0,0 @@
|
|||||||
from typing import Union
|
|
||||||
|
|
||||||
from httpx import Response
|
|
||||||
|
|
||||||
from ..types import (
|
|
||||||
Anime,
|
|
||||||
AnimeEpisodes,
|
|
||||||
MediaTranslationType,
|
|
||||||
PageInfo,
|
|
||||||
SearchResult,
|
|
||||||
SearchResults,
|
|
||||||
)
|
|
||||||
from .types import AllAnimeSearchResults, AllAnimeShow
|
|
||||||
|
|
||||||
|
|
||||||
def generate_list(count: Union[int, str]) -> list[str]:
|
|
||||||
return list(map(str, range(int(count))))
|
|
||||||
|
|
||||||
|
|
||||||
translation_type_map = {
|
|
||||||
"sub": MediaTranslationType.SUB,
|
|
||||||
"dub": MediaTranslationType.DUB,
|
|
||||||
"raw": MediaTranslationType.RAW,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def map_to_search_results(response: Response) -> SearchResults:
|
|
||||||
search_results: AllAnimeSearchResults = response.json()["data"]
|
|
||||||
return SearchResults(
|
|
||||||
page_info=PageInfo(total=search_results["shows"]["pageInfo"]["total"]),
|
|
||||||
results=[
|
|
||||||
SearchResult(
|
|
||||||
id=result["_id"],
|
|
||||||
title=result["name"],
|
|
||||||
media_type=result["__typename"],
|
|
||||||
episodes=AnimeEpisodes(
|
|
||||||
sub=generate_list(result["availableEpisodes"]["sub"]),
|
|
||||||
dub=generate_list(result["availableEpisodes"]["dub"]),
|
|
||||||
raw=generate_list(result["availableEpisodes"]["raw"]),
|
|
||||||
),
|
|
||||||
)
|
|
||||||
for result in search_results["shows"]["edges"]
|
|
||||||
],
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def map_to_anime_result(response: Response) -> Anime:
|
|
||||||
anime: AllAnimeShow = response.json()["data"]["show"]
|
|
||||||
return Anime(
|
|
||||||
id=anime["_id"],
|
|
||||||
title=anime["name"],
|
|
||||||
episodes=AnimeEpisodes(
|
|
||||||
sub=sorted(anime["availableEpisodesDetail"]["sub"], key=float),
|
|
||||||
dub=sorted(anime["availableEpisodesDetail"]["dub"], key=float),
|
|
||||||
raw=sorted(anime["availableEpisodesDetail"]["raw"], key=float),
|
|
||||||
),
|
|
||||||
type=anime.get("__typename"),
|
|
||||||
)
|
|
||||||
@@ -1,84 +0,0 @@
|
|||||||
import logging
|
|
||||||
from typing import TYPE_CHECKING
|
|
||||||
|
|
||||||
from .....core.utils.graphql import execute_graphql
|
|
||||||
from ..base import BaseAnimeProvider
|
|
||||||
from ..utils.debug import debug_provider
|
|
||||||
from .constants import (
|
|
||||||
ANIME_GQL,
|
|
||||||
API_GRAPHQL_ENDPOINT,
|
|
||||||
API_GRAPHQL_HEADERS,
|
|
||||||
API_GRAPHQL_REFERER,
|
|
||||||
EPISODE_GQL,
|
|
||||||
SEARCH_GQL,
|
|
||||||
)
|
|
||||||
from .mappers import (
|
|
||||||
map_to_anime_result,
|
|
||||||
map_to_search_results,
|
|
||||||
)
|
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
|
||||||
from .types import AllAnimeEpisode
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
|
|
||||||
class AllAnime(BaseAnimeProvider):
|
|
||||||
HEADERS = {"Referer": API_GRAPHQL_REFERER}
|
|
||||||
|
|
||||||
@debug_provider
|
|
||||||
def search(self, params):
|
|
||||||
response = execute_graphql(
|
|
||||||
API_GRAPHQL_ENDPOINT,
|
|
||||||
self.client,
|
|
||||||
SEARCH_GQL,
|
|
||||||
variables={
|
|
||||||
"search": {
|
|
||||||
"allowAdult": params.allow_nsfw,
|
|
||||||
"allowUnknown": params.allow_unknown,
|
|
||||||
"query": params.query,
|
|
||||||
},
|
|
||||||
"limit": params.page_limit,
|
|
||||||
"page": params.current_page,
|
|
||||||
"translationtype": params.translation_type,
|
|
||||||
"countryorigin": params.country_of_origin,
|
|
||||||
},
|
|
||||||
headers=API_GRAPHQL_HEADERS
|
|
||||||
)
|
|
||||||
return map_to_search_results(response)
|
|
||||||
|
|
||||||
@debug_provider
|
|
||||||
def get(self, params):
|
|
||||||
response = execute_graphql(
|
|
||||||
API_GRAPHQL_ENDPOINT,
|
|
||||||
self.client,
|
|
||||||
ANIME_GQL,
|
|
||||||
variables={"showId": params.id},
|
|
||||||
headers=API_GRAPHQL_HEADERS
|
|
||||||
)
|
|
||||||
return map_to_anime_result(response)
|
|
||||||
|
|
||||||
@debug_provider
|
|
||||||
def episode_streams(self, params):
|
|
||||||
from .extractors import extract_server
|
|
||||||
|
|
||||||
episode_response = execute_graphql(
|
|
||||||
API_GRAPHQL_ENDPOINT,
|
|
||||||
self.client,
|
|
||||||
EPISODE_GQL,
|
|
||||||
variables={
|
|
||||||
"showId": params.anime_id,
|
|
||||||
"translationType": params.translation_type,
|
|
||||||
"episodeString": params.episode,
|
|
||||||
},
|
|
||||||
headers=API_GRAPHQL_HEADERS
|
|
||||||
)
|
|
||||||
episode: AllAnimeEpisode = episode_response.json()["data"]["episode"]
|
|
||||||
for source in episode["sourceUrls"]:
|
|
||||||
if server := extract_server(self.client, params.episode, episode, source):
|
|
||||||
yield server
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
from ..utils.debug import test_anime_provider
|
|
||||||
|
|
||||||
test_anime_provider(AllAnime)
|
|
||||||
@@ -1,111 +0,0 @@
|
|||||||
from enum import Enum
|
|
||||||
from typing import Literal, TypedDict
|
|
||||||
|
|
||||||
|
|
||||||
class Server(Enum):
|
|
||||||
SHAREPOINT = "sharepoint"
|
|
||||||
DROPBOX = "dropbox"
|
|
||||||
GOGOANIME = "gogoanime"
|
|
||||||
WETRANSFER = "weTransfer"
|
|
||||||
WIXMP = "wixmp"
|
|
||||||
YT = "Yt"
|
|
||||||
MP4_UPLOAD = "mp4-upload"
|
|
||||||
|
|
||||||
|
|
||||||
class AllAnimeEpisodesDetail(TypedDict):
|
|
||||||
dub: list[str]
|
|
||||||
sub: list[str]
|
|
||||||
raw: list[str]
|
|
||||||
|
|
||||||
|
|
||||||
class AllAnimeEpisodes(TypedDict):
|
|
||||||
dub: int
|
|
||||||
sub: int
|
|
||||||
raw: int
|
|
||||||
|
|
||||||
|
|
||||||
class AllAnimePageInfo(TypedDict):
|
|
||||||
total: int
|
|
||||||
|
|
||||||
|
|
||||||
class AllAnimeShow(TypedDict):
|
|
||||||
_id: str
|
|
||||||
name: str
|
|
||||||
availableEpisodesDetail: AllAnimeEpisodesDetail
|
|
||||||
__typename: str
|
|
||||||
|
|
||||||
|
|
||||||
class AllAnimeSearchResult(TypedDict):
|
|
||||||
_id: str
|
|
||||||
name: str
|
|
||||||
availableEpisodes: AllAnimeEpisodes
|
|
||||||
__typename: str | None
|
|
||||||
|
|
||||||
|
|
||||||
class AllAnimeShows(TypedDict):
|
|
||||||
pageInfo: AllAnimePageInfo
|
|
||||||
edges: list[AllAnimeSearchResult]
|
|
||||||
|
|
||||||
|
|
||||||
class AllAnimeSearchResults(TypedDict):
|
|
||||||
shows: AllAnimeShows
|
|
||||||
|
|
||||||
|
|
||||||
class AllAnimeSourceDownload(TypedDict):
|
|
||||||
sourceName: str
|
|
||||||
dowloadUrl: str
|
|
||||||
|
|
||||||
|
|
||||||
class AllAnimeSource(TypedDict):
|
|
||||||
sourceName: Literal[
|
|
||||||
"Sak",
|
|
||||||
"S-mp4",
|
|
||||||
"Luf-mp4",
|
|
||||||
"Default",
|
|
||||||
"Yt-mp4",
|
|
||||||
"Kir",
|
|
||||||
"Mp4",
|
|
||||||
"Ak",
|
|
||||||
"Vid-mp4",
|
|
||||||
"Ok",
|
|
||||||
"Ss-Hls",
|
|
||||||
"Fm-Hls",
|
|
||||||
]
|
|
||||||
sourceUrl: str
|
|
||||||
priority: float
|
|
||||||
sandbox: str
|
|
||||||
type: str
|
|
||||||
className: str
|
|
||||||
streamerId: str
|
|
||||||
downloads: AllAnimeSourceDownload
|
|
||||||
|
|
||||||
|
|
||||||
class AllAnimeEpisodeStream(TypedDict):
|
|
||||||
link: str
|
|
||||||
hls: bool
|
|
||||||
resolutionStr: str
|
|
||||||
fromCache: str
|
|
||||||
|
|
||||||
|
|
||||||
class AllAnimeEpisodeStreams(TypedDict):
|
|
||||||
links: list[AllAnimeEpisodeStream]
|
|
||||||
|
|
||||||
|
|
||||||
class AllAnimeEpisode(TypedDict):
|
|
||||||
episodeString: str
|
|
||||||
sourceUrls: list[AllAnimeSource]
|
|
||||||
notes: str | None
|
|
||||||
|
|
||||||
|
|
||||||
class AllAnimeStream:
|
|
||||||
link: str
|
|
||||||
mp4: bool
|
|
||||||
hls: bool | None
|
|
||||||
resolutionStr: str
|
|
||||||
fromCache: str
|
|
||||||
priority: int
|
|
||||||
headers: dict | None
|
|
||||||
|
|
||||||
|
|
||||||
class AllAnimeStreams:
|
|
||||||
links: list[AllAnimeStream]
|
|
||||||
@@ -1,92 +0,0 @@
|
|||||||
import functools
|
|
||||||
import logging
|
|
||||||
import os
|
|
||||||
import re
|
|
||||||
from itertools import cycle
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
# Dictionary to map hex values to characters
|
|
||||||
hex_to_char = {
|
|
||||||
"01": "9",
|
|
||||||
"08": "0",
|
|
||||||
"05": "=",
|
|
||||||
"0a": "2",
|
|
||||||
"0b": "3",
|
|
||||||
"0c": "4",
|
|
||||||
"07": "?",
|
|
||||||
"00": "8",
|
|
||||||
"5c": "d",
|
|
||||||
"0f": "7",
|
|
||||||
"5e": "f",
|
|
||||||
"17": "/",
|
|
||||||
"54": "l",
|
|
||||||
"09": "1",
|
|
||||||
"48": "p",
|
|
||||||
"4f": "w",
|
|
||||||
"0e": "6",
|
|
||||||
"5b": "c",
|
|
||||||
"5d": "e",
|
|
||||||
"0d": "5",
|
|
||||||
"53": "k",
|
|
||||||
"1e": "&",
|
|
||||||
"5a": "b",
|
|
||||||
"59": "a",
|
|
||||||
"4a": "r",
|
|
||||||
"4c": "t",
|
|
||||||
"4e": "v",
|
|
||||||
"57": "o",
|
|
||||||
"51": "i",
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def debug_extractor(extractor_function):
|
|
||||||
@functools.wraps(extractor_function)
|
|
||||||
def _provider_function_wrapper(*args):
|
|
||||||
if not os.environ.get("VIU_DEBUG"):
|
|
||||||
try:
|
|
||||||
return extractor_function(*args)
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(
|
|
||||||
f"[AllAnime@Server={args[3].get('sourceName', 'UNKNOWN')}]: {e}"
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
return extractor_function(*args)
|
|
||||||
|
|
||||||
return _provider_function_wrapper
|
|
||||||
|
|
||||||
|
|
||||||
def give_random_quality(links):
|
|
||||||
qualities = cycle(["1080", "720", "480", "360"])
|
|
||||||
|
|
||||||
return [
|
|
||||||
{**episode_stream, "quality": quality}
|
|
||||||
for episode_stream, quality in zip(links, qualities, strict=False)
|
|
||||||
]
|
|
||||||
|
|
||||||
|
|
||||||
def one_digit_symmetric_xor(password: int, target: str):
|
|
||||||
def genexp():
|
|
||||||
for segment in bytearray.fromhex(target):
|
|
||||||
yield segment ^ password
|
|
||||||
|
|
||||||
return bytes(genexp()).decode("utf-8")
|
|
||||||
|
|
||||||
|
|
||||||
def decode_hex_string(hex_string):
|
|
||||||
"""some of the sources encrypt the urls into hex codes this function decrypts the urls
|
|
||||||
|
|
||||||
Args:
|
|
||||||
hex_string ([TODO:parameter]): [TODO:description]
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
[TODO:return]
|
|
||||||
"""
|
|
||||||
# Split the hex string into pairs of characters
|
|
||||||
hex_pairs = re.findall("..", hex_string)
|
|
||||||
|
|
||||||
# Decode each hex pair
|
|
||||||
decoded_chars = [hex_to_char.get(pair.lower(), pair) for pair in hex_pairs]
|
|
||||||
|
|
||||||
# TODO: Better type handling
|
|
||||||
return "".join(decoded_chars) # type: ignore
|
|
||||||
@@ -1 +0,0 @@
|
|||||||
|
|
||||||
@@ -1,56 +0,0 @@
|
|||||||
import re
|
|
||||||
|
|
||||||
ANIMEPAHE = "animepahe.pw"
|
|
||||||
ANIMEPAHE_BASE = f"https://{ANIMEPAHE}"
|
|
||||||
ANIMEPAHE_ENDPOINT = f"{ANIMEPAHE_BASE}/api"
|
|
||||||
CDN_PROVIDER = "kwik.cx"
|
|
||||||
CDN_PROVIDER_BASE = f"https://{CDN_PROVIDER}"
|
|
||||||
|
|
||||||
SERVERS_AVAILABLE = ["kwik"]
|
|
||||||
REQUEST_HEADERS = {
|
|
||||||
"Cookie": "__ddgid_=VvX0ebHrH2DsFZo4; __ddgmark_=3savRpSVFhvZcn5x; __ddg2_=buBJ3c4pNBYKFZNp; __ddg1_=rbVADKr9URtt55zoIGFa; SERVERID=janna; XSRF-TOKEN=eyJpdiI6IjV5bFNtd0phUHgvWGJxc25wL0VJSUE9PSIsInZhbHVlIjoicEJTZktlR2hxR2JZTWhnL0JzazlvZU5TQTR2bjBWZ2dDb0RwUXVUUWNSclhQWUhLRStYSmJmWmUxWkpiYkFRYU12RjFWejlSWHorME1wZG5qQ1U0TnFlNnBFR2laQjN1MjdyNjc5TjVPdXdJb2o5VkU1bEduRW9pRHNDTHh6Sy8iLCJtYWMiOiI0OTc0ZmNjY2UwMGJkOWY2MWNkM2NlMjk2ZGMyZGJmMWE0NTdjZTdkNGI2Y2IwNTIzZmFiZWU5ZTE2OTk0YmU4IiwidGFnIjoiIn0%3D; laravel_session=eyJpdiI6ImxvdlpqREFnTjdaeFJubUlXQWlJVWc9PSIsInZhbHVlIjoiQnE4R3VHdjZ4M1NDdEVWM1ZqMUxtNnVERnJCcmtCUHZKNzRPR2RFbzNFcStTL29xdnVTbWhsNVRBUXEybVZWNU1UYVlTazFqYlN5UjJva1k4czNGaXBTbkJJK01oTUd3VHRYVHBoc3dGUWxHYnFlS2NJVVNFbTFqMVBWdFpuVUgiLCJtYWMiOiI1NDdjZTVkYmNhNjUwZTMxZmRlZmVmMmRlMGNiYjAwYjlmYjFjY2U0MDc1YTQzZThiMTIxMjJlYTg1NTA4YjBmIiwidGFnIjoiIn0%3D; latest=5592",
|
|
||||||
"Host": ANIMEPAHE,
|
|
||||||
"Accept": "application, text/javascript, */*; q=0.01",
|
|
||||||
"Accept-Encoding": "Utf-8",
|
|
||||||
"Referer": ANIMEPAHE_BASE,
|
|
||||||
"DNT": "1",
|
|
||||||
"Connection": "keep-alive",
|
|
||||||
"Sec-Fetch-Dest": "empty",
|
|
||||||
"Sec-Fetch-Site": "same-origin",
|
|
||||||
"Sec-Fetch-Mode": "cors",
|
|
||||||
"TE": "trailers",
|
|
||||||
}
|
|
||||||
SERVER_HEADERS = {
|
|
||||||
"Host": "kwik.cx",
|
|
||||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/png,image/svg+xml,*/*;q=0.8",
|
|
||||||
"Accept-Language": "en-US,en;q=0.5",
|
|
||||||
"Accept-Encoding": "Utf-8",
|
|
||||||
"DNT": "1",
|
|
||||||
"Connection": "keep-alive",
|
|
||||||
"Referer": ANIMEPAHE_BASE + "/",
|
|
||||||
"Upgrade-Insecure-Requests": "1",
|
|
||||||
"Sec-Fetch-Dest": "iframe",
|
|
||||||
"Sec-Fetch-Mode": "navigate",
|
|
||||||
"Sec-Fetch-Site": "cross-site",
|
|
||||||
"Priority": "u=4",
|
|
||||||
"TE": "trailers",
|
|
||||||
}
|
|
||||||
|
|
||||||
STREAM_HEADERS = {
|
|
||||||
# "Host": "vault-16.owocdn.top", # This will have to be the actual host of the stream (behind Kwik)
|
|
||||||
"Accept": "*/*",
|
|
||||||
"Accept-Language": "en-US,en;q=0.5",
|
|
||||||
"Accept-Encoding": "gzip, deflate, br, zstd",
|
|
||||||
"Origin": CDN_PROVIDER_BASE,
|
|
||||||
"Sec-GPC": "1",
|
|
||||||
"Connection": "keep-alive",
|
|
||||||
"Referer": CDN_PROVIDER_BASE + "/",
|
|
||||||
"Sec-Fetch-Dest": "empty",
|
|
||||||
"Sec-Fetch-Mode": "cors",
|
|
||||||
"Sec-Fetch-Site": "cross-site",
|
|
||||||
"TE": "trailers",
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
JUICY_STREAM_REGEX = re.compile(r"source='(.*)';")
|
|
||||||
KWIK_RE = re.compile(r"Player\|(.+?)'")
|
|
||||||
@@ -1,77 +0,0 @@
|
|||||||
import re
|
|
||||||
|
|
||||||
|
|
||||||
def animepahe_key_creator(c: int, a: int):
|
|
||||||
from ...scraping.utils import encode_base_n
|
|
||||||
|
|
||||||
if c < a:
|
|
||||||
val_a = ""
|
|
||||||
else:
|
|
||||||
val_a = animepahe_key_creator(int(c / a), a)
|
|
||||||
c = c % a
|
|
||||||
if c > 35:
|
|
||||||
val_b = chr(c + 29)
|
|
||||||
else:
|
|
||||||
val_b = encode_base_n(c, 36)
|
|
||||||
return val_a + val_b
|
|
||||||
|
|
||||||
|
|
||||||
def animepahe_embed_decoder(
|
|
||||||
encoded_js_p: str,
|
|
||||||
base_a: int,
|
|
||||||
no_of_keys_c: int,
|
|
||||||
values_to_replace_with_k: list,
|
|
||||||
):
|
|
||||||
decode_mapper_d: dict = {}
|
|
||||||
for i in range(no_of_keys_c):
|
|
||||||
key = animepahe_key_creator(i, base_a)
|
|
||||||
val = values_to_replace_with_k[i] or key
|
|
||||||
decode_mapper_d[key] = val
|
|
||||||
return re.sub(
|
|
||||||
r"\b\w+\b", lambda match: decode_mapper_d[match.group(0)], encoded_js_p
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
PARAMETERS_REGEX = re.compile(r"eval\(function\(p,a,c,k,e,d\)\{.*\}\((.*?)\)\)$")
|
|
||||||
ENCODE_JS_REGEX = re.compile(r"'(.*?);',(\d+),(\d+),'(.*)'\.split")
|
|
||||||
|
|
||||||
|
|
||||||
def process_animepahe_embed_page(embed_page: str):
|
|
||||||
from ...scraping.html_parser import get_element_text_and_html_by_tag
|
|
||||||
|
|
||||||
encoded_js_string = ""
|
|
||||||
embed_page_content = embed_page
|
|
||||||
for _ in range(8):
|
|
||||||
text, html = get_element_text_and_html_by_tag("script", embed_page_content)
|
|
||||||
if not text and html:
|
|
||||||
embed_page_content = re.sub(html, "", embed_page_content)
|
|
||||||
continue
|
|
||||||
if text:
|
|
||||||
encoded_js_string = text.strip()
|
|
||||||
break
|
|
||||||
if not encoded_js_string:
|
|
||||||
return
|
|
||||||
obsfucated_js_parameter_match = PARAMETERS_REGEX.search(encoded_js_string)
|
|
||||||
if not obsfucated_js_parameter_match:
|
|
||||||
return
|
|
||||||
parameter_string = obsfucated_js_parameter_match.group(1)
|
|
||||||
encoded_js_parameter_string = ENCODE_JS_REGEX.search(parameter_string)
|
|
||||||
if not encoded_js_parameter_string:
|
|
||||||
return
|
|
||||||
p: str = encoded_js_parameter_string.group(1)
|
|
||||||
a: int = int(encoded_js_parameter_string.group(2))
|
|
||||||
c: int = int(encoded_js_parameter_string.group(3))
|
|
||||||
k: list = encoded_js_parameter_string.group(4).split("|")
|
|
||||||
return animepahe_embed_decoder(p, a, c, k).replace("\\", "")
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
# Testing time
|
|
||||||
filepath = input("Enter file name: ")
|
|
||||||
if filepath:
|
|
||||||
with open(filepath) as file:
|
|
||||||
data = file.read()
|
|
||||||
else:
|
|
||||||
data = """<script>eval(function(p,a,c,k,e,d){e=function(c){return(c<a?'':e(parseInt(c/a)))+((c=c%a)>35?String.fromCharCode(c+29):c.toString(36))};if(!''.replace(/^/,String)){while(c--){d[e(c)]=k[c]||e(c)}k=[function(e){return d[e]}];e=function(){return'\\w+'};c=1};while(c--){if(k[c]){p=p.replace(new RegExp('\\b'+e(c)+'\\b','g'),k[c])}}return p}('f $7={H:a(2){4 B(9.7.h(y z("(?:(?:^|.*;)\\\\s*"+d(2).h(/[\\-\\.\\+\\*]/g,"\\\\$&")+"\\\\s*\\\\=\\\\s*([^;]*).*$)|^.*$"),"$1"))||G},E:a(2,q,3,6,5,t){k(!2||/^(?:8|r\\-v|o|m|p)$/i.D(2)){4 w}f b="";k(3){F(3.J){j K:b=3===P?"; 8=O, I N Q M:u:u A":"; r-v="+3;n;j L:b="; 8="+3;n;j S:b="; 8="+3.Z();n}}9.7=d(2)+"="+d(q)+b+(5?"; m="+5:"")+(6?"; o="+6:"")+(t?"; p":"");4 x},Y:a(2,6,5){k(!2||!11.C(2)){4 w}9.7=d(2)+"=; 8=12, R 10 W l:l:l A"+(5?"; m="+5:"")+(6?"; o="+6:"");4 x},C:a(2){4(y z("(?:^|;\\\\s*)"+d(2).h(/[\\-\\.\\+\\*]/g,"\\\\$&")+"\\\\s*\\\\=")).D(9.7)},X:a(){f c=9.7.h(/((?:^|\\s*;)[^\\=]+)(?=;|$)|^\\s*|\\s*(?:\\=[^;]*)?(?:\\1|$)/g,"").T(/\\s*(?:\\=[^;]*)?;\\s*/);U(f e=0;e<c.V;e++){c[e]=B(c[e])}4 c}};',62,65,'||sKey|vEnd|return|sDomain|sPath|cookie|expires|document|function|sExpires|aKeys|encodeURIComponent|nIdx|var||replace||case|if|00|domain|break|path|secure|sValue|max||bSecure|59|age|false|true|new|RegExp|GMT|decodeURIComponent|hasItem|test|setItem|switch|null|getItem|31|constructor|Number|String|23|Dec|Fri|Infinity|9999|01|Date|split|for|length|1970|keys|removeItem|toUTCString|Jan|this|Thu'.split('|'),0,{}));eval(function(p,a,c,k,e,d){e=function(c){return(c<a?'':e(parseInt(c/a)))+((c=c%a)>35?String.fromCharCode(c+29):c.toString(36))};if(!''.replace(/^/,String)){while(c--){d[e(c)]=k[c]||e(c)}k=[function(e){return d[e]}];e=function(){return'\\w+'};c=1};while(c--){if(k[c]){p=p.replace(new RegExp('\\b'+e(c)+'\\b','g'),k[c])}}return p}('h o=\'1D://1C-E.1B.1A.1z/1y/E/1x/1w/1v.1u\';h d=s.r(\'d\');h 0=B 1t(d,{\'1s\':{\'1r\':i},\'1q\':\'16:9\',\'D\':1,\'1p\':5,\'1o\':{\'1n\':\'1m\'},1l:[\'7-1k\',\'7\',\'1j\',\'1i-1h\',\'1g\',\'1f-1e\',\'1d\',\'D\',\'1c\',\'1b\',\'1a\',\'19\',\'C\',\'18\'],\'C\':{\'17\':i}});8(!A.15()){d.14=o}x{j z={13:12,11:10,Z:Y,X:i,W:i};h c=B A(z);c.V(o);c.U(d);g.c=c}0.3("T",6=>{g.S.R.Q("P")});0.O=1;k v(b,n,m){8(b.y){b.y(n,m,N)}x 8(b.w){b.w(\'3\'+n,m)}}j 4=k(l){g.M.L(l,\'*\')};v(g,\'l\',k(e){j a=e.a;8(a===\'7\')0.7();8(a===\'f\')0.f();8(a===\'u\')0.u()});0.3(\'t\',6=>{4(\'t\')});0.3(\'7\',6=>{4(\'7\')});0.3(\'f\',6=>{4(\'f\')});0.3(\'K\',6=>{4(0.q);s.r(\'.J-I\').H=G(0.q.F(2))});0.3(\'p\',6=>{4(\'p\')});',62,102,'player|||on|sendMessage||event|play|if||data|element|hls|video||pause|window|const|true|var|function|message|eventHandler|eventName|source|ended|currentTime|querySelector|document|ready|stop|bindEvent|attachEvent|else|addEventListener|config|Hls|new|fullscreen|volume|01|toFixed|String|innerHTML|timestamp|ss|timeupdate|postMessage|parent|false|speed|landscape|lock|orientation|screen|enterfullscreen|attachMedia|loadSource|lowLatencyMode|enableWorker|Infinity|backBufferLength|600|maxMaxBufferLength|180|maxBufferLength|src|isSupported||iosNative|capture|airplay|pip|settings|captions|mute|time|current|progress|forward|fast|rewind|large|controls|kwik|key|storage|seekTime|ratio|global|keyboard|Plyr|m3u8|uwu|b92a392054c041a3f9c6eecabeb0e127183f44e547828447b10bca8d77523e6f|03|stream|org|nextcdn|files|eu|https'.split('|'),0,{}))</script>"""
|
|
||||||
|
|
||||||
print(process_animepahe_embed_page(data))
|
|
||||||
@@ -1,113 +0,0 @@
|
|||||||
import logging
|
|
||||||
|
|
||||||
from ..types import (
|
|
||||||
Anime,
|
|
||||||
AnimeEpisodeInfo,
|
|
||||||
AnimeEpisodes,
|
|
||||||
EpisodeStream,
|
|
||||||
MediaTranslationType,
|
|
||||||
PageInfo,
|
|
||||||
SearchResult,
|
|
||||||
SearchResults,
|
|
||||||
Server,
|
|
||||||
)
|
|
||||||
from .types import (
|
|
||||||
AnimePaheAnimePage,
|
|
||||||
AnimePaheSearchPage,
|
|
||||||
)
|
|
||||||
|
|
||||||
translation_type_map = {
|
|
||||||
"sub": MediaTranslationType.SUB,
|
|
||||||
"dub": MediaTranslationType.DUB,
|
|
||||||
"raw": MediaTranslationType.RAW,
|
|
||||||
}
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
|
|
||||||
def map_to_search_results(data: AnimePaheSearchPage) -> SearchResults:
|
|
||||||
results = []
|
|
||||||
for result in data["data"]:
|
|
||||||
results.append(
|
|
||||||
SearchResult(
|
|
||||||
id=result["session"],
|
|
||||||
title=result["title"],
|
|
||||||
episodes=AnimeEpisodes(
|
|
||||||
sub=list(map(str, range(1, result["episodes"] + 1))),
|
|
||||||
dub=list(map(str, range(1, result["episodes"] + 1))),
|
|
||||||
raw=list(map(str, range(1, result["episodes"] + 1))),
|
|
||||||
),
|
|
||||||
media_type=result["type"],
|
|
||||||
score=result["score"],
|
|
||||||
status=result["status"],
|
|
||||||
season=result["season"],
|
|
||||||
poster=result["poster"],
|
|
||||||
year=str(result["year"]),
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
return SearchResults(
|
|
||||||
page_info=PageInfo(
|
|
||||||
total=data["total"],
|
|
||||||
per_page=data["per_page"],
|
|
||||||
current_page=data["current_page"],
|
|
||||||
),
|
|
||||||
results=results,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def map_to_anime_result(
|
|
||||||
search_result: SearchResult, anime: AnimePaheAnimePage
|
|
||||||
) -> Anime:
|
|
||||||
episodes_info = []
|
|
||||||
episodes = []
|
|
||||||
anime["data"] = sorted(anime["data"], key=lambda k: float(k["episode"]))
|
|
||||||
for ep_info in anime["data"]:
|
|
||||||
episodes.append(str(ep_info["episode"]))
|
|
||||||
episodes_info.append(
|
|
||||||
AnimeEpisodeInfo(
|
|
||||||
id=str(ep_info["id"]),
|
|
||||||
session_id=ep_info["session"],
|
|
||||||
episode=str(ep_info["episode"]),
|
|
||||||
title=ep_info["title"],
|
|
||||||
poster=ep_info["snapshot"],
|
|
||||||
duration=str(ep_info["duration"]),
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
return Anime(
|
|
||||||
id=search_result.id,
|
|
||||||
title=search_result.title,
|
|
||||||
episodes=AnimeEpisodes(
|
|
||||||
sub=episodes,
|
|
||||||
dub=episodes,
|
|
||||||
),
|
|
||||||
year=str(search_result.year),
|
|
||||||
poster=search_result.poster,
|
|
||||||
episodes_info=episodes_info,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def map_to_server(
|
|
||||||
episode: AnimeEpisodeInfo,
|
|
||||||
translation_type: str,
|
|
||||||
stream_links: list[tuple[str, str]],
|
|
||||||
headers: dict[str, str],
|
|
||||||
) -> Server:
|
|
||||||
links = [
|
|
||||||
EpisodeStream(
|
|
||||||
link=link[1],
|
|
||||||
quality=link[0] if link[0] in ["360", "480", "720", "1080"] else "1080", # type:ignore
|
|
||||||
translation_type=translation_type_map[translation_type],
|
|
||||||
)
|
|
||||||
for link in stream_links
|
|
||||||
]
|
|
||||||
|
|
||||||
# sort links by quality, best to worst
|
|
||||||
links.sort(key=lambda x: int(x.quality), reverse=True)
|
|
||||||
logger.debug(f"Aggregated links: {links}")
|
|
||||||
|
|
||||||
return Server(
|
|
||||||
name="kwik", links=links, episode_title=episode.title, headers=headers
|
|
||||||
)
|
|
||||||
|
|
||||||
@@ -1,214 +0,0 @@
|
|||||||
import logging
|
|
||||||
from functools import lru_cache
|
|
||||||
from typing import Iterator, Optional
|
|
||||||
from urllib.parse import urlparse
|
|
||||||
|
|
||||||
from ..base import BaseAnimeProvider
|
|
||||||
from ..params import AnimeParams, EpisodeStreamsParams, SearchParams
|
|
||||||
from ..types import Anime, AnimeEpisodeInfo, SearchResult, SearchResults, Server
|
|
||||||
from ..utils.debug import debug_provider
|
|
||||||
from .constants import (
|
|
||||||
ANIMEPAHE_BASE,
|
|
||||||
ANIMEPAHE_ENDPOINT,
|
|
||||||
CDN_PROVIDER,
|
|
||||||
JUICY_STREAM_REGEX,
|
|
||||||
REQUEST_HEADERS,
|
|
||||||
SERVER_HEADERS,
|
|
||||||
STREAM_HEADERS,
|
|
||||||
)
|
|
||||||
from .extractor import process_animepahe_embed_page
|
|
||||||
from .mappers import map_to_anime_result, map_to_search_results, map_to_server
|
|
||||||
from .types import AnimePaheAnimePage, AnimePaheSearchPage
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
|
|
||||||
class AnimePahe(BaseAnimeProvider):
|
|
||||||
HEADERS = REQUEST_HEADERS
|
|
||||||
|
|
||||||
@debug_provider
|
|
||||||
def search(self, params: SearchParams) -> SearchResults | None:
|
|
||||||
return self._search(params)
|
|
||||||
|
|
||||||
@lru_cache()
|
|
||||||
def _search(self, params: SearchParams) -> SearchResults | None:
|
|
||||||
url_params = {"m": "search", "q": params.query}
|
|
||||||
response = self.client.get(ANIMEPAHE_ENDPOINT, params=url_params)
|
|
||||||
response.raise_for_status()
|
|
||||||
data: AnimePaheSearchPage = response.json()
|
|
||||||
if not data.get("data"):
|
|
||||||
return
|
|
||||||
return map_to_search_results(data)
|
|
||||||
|
|
||||||
@debug_provider
|
|
||||||
def get(self, params: AnimeParams) -> Anime | None:
|
|
||||||
return self._get_anime(params)
|
|
||||||
|
|
||||||
@lru_cache()
|
|
||||||
def _get_anime(self, params: AnimeParams) -> Anime | None:
|
|
||||||
page = 1
|
|
||||||
standardized_episode_number = 0
|
|
||||||
|
|
||||||
search_result = self._get_search_result(params)
|
|
||||||
if not search_result:
|
|
||||||
logger.error(f"No search result found for ID {params.id}")
|
|
||||||
return None
|
|
||||||
|
|
||||||
anime: Optional[AnimePaheAnimePage] = None
|
|
||||||
|
|
||||||
has_next_page = True
|
|
||||||
while has_next_page:
|
|
||||||
logger.debug(f"Loading page: {page}")
|
|
||||||
_anime_page = self._anime_page_loader(
|
|
||||||
m="release",
|
|
||||||
id=params.id,
|
|
||||||
sort="episode_asc",
|
|
||||||
page=page,
|
|
||||||
)
|
|
||||||
|
|
||||||
has_next_page = True if _anime_page["next_page_url"] else False
|
|
||||||
page += 1
|
|
||||||
if not anime:
|
|
||||||
anime = _anime_page
|
|
||||||
else:
|
|
||||||
anime["data"].extend(_anime_page["data"])
|
|
||||||
|
|
||||||
if anime:
|
|
||||||
for episode in anime.get("data", []):
|
|
||||||
if episode["episode"] % 1 == 0:
|
|
||||||
standardized_episode_number += 1
|
|
||||||
episode.update({"episode": standardized_episode_number})
|
|
||||||
else:
|
|
||||||
standardized_episode_number += episode["episode"] % 1
|
|
||||||
episode.update({"episode": standardized_episode_number})
|
|
||||||
standardized_episode_number = int(standardized_episode_number)
|
|
||||||
|
|
||||||
return map_to_anime_result(search_result, anime)
|
|
||||||
|
|
||||||
@lru_cache()
|
|
||||||
def _get_search_result(self, params: AnimeParams) -> Optional[SearchResult]:
|
|
||||||
search_results = self._search(SearchParams(query=params.query))
|
|
||||||
if not search_results or not search_results.results:
|
|
||||||
logger.error(f"No search results found for ID {params.id}")
|
|
||||||
return None
|
|
||||||
for search_result in search_results.results:
|
|
||||||
if search_result.id == params.id:
|
|
||||||
return search_result
|
|
||||||
|
|
||||||
@lru_cache()
|
|
||||||
def _anime_page_loader(self, m, id, sort, page) -> AnimePaheAnimePage:
|
|
||||||
url_params = {
|
|
||||||
"m": m,
|
|
||||||
"id": id,
|
|
||||||
"sort": sort,
|
|
||||||
"page": page,
|
|
||||||
}
|
|
||||||
response = self.client.get(ANIMEPAHE_ENDPOINT, params=url_params)
|
|
||||||
response.raise_for_status()
|
|
||||||
return response.json()
|
|
||||||
|
|
||||||
@debug_provider
|
|
||||||
def episode_streams(self, params: EpisodeStreamsParams) -> Iterator[Server] | None:
|
|
||||||
from ...scraping.html_parser import (
|
|
||||||
extract_attributes,
|
|
||||||
get_element_by_id,
|
|
||||||
get_elements_html_by_class,
|
|
||||||
)
|
|
||||||
|
|
||||||
episode = self._get_episode_info(params)
|
|
||||||
if not episode:
|
|
||||||
logger.error(
|
|
||||||
f"Episode {params.episode} doesn't exist for anime {params.anime_id}"
|
|
||||||
)
|
|
||||||
return
|
|
||||||
|
|
||||||
url = f"{ANIMEPAHE_BASE}/play/{params.anime_id}/{episode.session_id}"
|
|
||||||
response = self.client.get(url, follow_redirects=True)
|
|
||||||
response.raise_for_status()
|
|
||||||
|
|
||||||
c = get_element_by_id("resolutionMenu", response.text)
|
|
||||||
if not c:
|
|
||||||
logger.error("Resolution menu not found in the response")
|
|
||||||
return
|
|
||||||
resolutionMenuItems = get_elements_html_by_class("dropdown-item", c)
|
|
||||||
res_dicts = [extract_attributes(item) for item in resolutionMenuItems]
|
|
||||||
quality = None
|
|
||||||
translation_type = None
|
|
||||||
stream_links = []
|
|
||||||
stream_host = None
|
|
||||||
|
|
||||||
# TODO: better document the scraping process
|
|
||||||
for res_dict in res_dicts:
|
|
||||||
# the actual attributes are data attributes in the original html 'prefixed with data-'
|
|
||||||
embed_url = res_dict["src"]
|
|
||||||
logger.debug(f"Found embed url: {embed_url}")
|
|
||||||
data_audio = "dub" if res_dict["audio"] == "eng" else "sub"
|
|
||||||
|
|
||||||
if data_audio != params.translation_type:
|
|
||||||
logger.debug(f"Found {data_audio} but wanted {params.translation_type}")
|
|
||||||
continue
|
|
||||||
|
|
||||||
if not embed_url:
|
|
||||||
logger.warning("embed url not found please report to the developers")
|
|
||||||
continue
|
|
||||||
|
|
||||||
embed_response = self.client.get(
|
|
||||||
embed_url,
|
|
||||||
headers={
|
|
||||||
"User-Agent": self.client.headers["User-Agent"],
|
|
||||||
**SERVER_HEADERS,
|
|
||||||
},
|
|
||||||
)
|
|
||||||
embed_response.raise_for_status()
|
|
||||||
embed_page = embed_response.text
|
|
||||||
logger.debug("Processing embed page for JS decoding")
|
|
||||||
|
|
||||||
decoded_js = process_animepahe_embed_page(embed_page)
|
|
||||||
if not decoded_js:
|
|
||||||
logger.error("failed to decode embed page")
|
|
||||||
continue
|
|
||||||
logger.debug(f"Decoded JS: {decoded_js[:100]}...")
|
|
||||||
juicy_stream = JUICY_STREAM_REGEX.search(decoded_js)
|
|
||||||
if not juicy_stream:
|
|
||||||
logger.error("failed to find juicy stream")
|
|
||||||
continue
|
|
||||||
logger.debug(f"Found juicy stream: {juicy_stream.group(1)}")
|
|
||||||
juicy_stream = juicy_stream.group(1)
|
|
||||||
stream_host = urlparse(juicy_stream).hostname
|
|
||||||
quality = res_dict["resolution"]
|
|
||||||
logger.debug(f"Found quality: {quality}")
|
|
||||||
translation_type = data_audio
|
|
||||||
stream_links.append((quality, juicy_stream))
|
|
||||||
|
|
||||||
if translation_type and stream_links:
|
|
||||||
headers = {
|
|
||||||
"User-Agent": self.client.headers["User-Agent"],
|
|
||||||
"Host": stream_host or CDN_PROVIDER,
|
|
||||||
**STREAM_HEADERS,
|
|
||||||
}
|
|
||||||
yield map_to_server(
|
|
||||||
episode, translation_type, stream_links, headers=headers
|
|
||||||
)
|
|
||||||
|
|
||||||
@lru_cache()
|
|
||||||
def _get_episode_info(
|
|
||||||
self, params: EpisodeStreamsParams
|
|
||||||
) -> Optional[AnimeEpisodeInfo]:
|
|
||||||
anime_info = self._get_anime(
|
|
||||||
AnimeParams(id=params.anime_id, query=params.query)
|
|
||||||
)
|
|
||||||
if not anime_info:
|
|
||||||
logger.error(f"No anime info for {params.anime_id}")
|
|
||||||
return
|
|
||||||
if not anime_info.episodes_info:
|
|
||||||
logger.error(f"No episodes info for {params.anime_id}")
|
|
||||||
return
|
|
||||||
for episode in anime_info.episodes_info:
|
|
||||||
if episode.episode == params.episode:
|
|
||||||
return episode
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
from ..utils.debug import test_anime_provider
|
|
||||||
|
|
||||||
test_anime_provider(AnimePahe)
|
|
||||||
@@ -1,108 +0,0 @@
|
|||||||
from enum import Enum
|
|
||||||
from typing import Literal, TypedDict
|
|
||||||
|
|
||||||
|
|
||||||
class Server(Enum):
|
|
||||||
KWIK = "Kwik"
|
|
||||||
|
|
||||||
|
|
||||||
class AnimePaheSearchResult(TypedDict):
|
|
||||||
id: str
|
|
||||||
title: str
|
|
||||||
type: str
|
|
||||||
episodes: int
|
|
||||||
status: str
|
|
||||||
season: str
|
|
||||||
year: int
|
|
||||||
score: int
|
|
||||||
poster: str
|
|
||||||
session: str
|
|
||||||
|
|
||||||
|
|
||||||
class AnimePaheSearchPage(TypedDict):
|
|
||||||
total: int
|
|
||||||
per_page: int
|
|
||||||
current_page: int
|
|
||||||
last_page: int
|
|
||||||
_from: int
|
|
||||||
to: int
|
|
||||||
data: list[AnimePaheSearchResult]
|
|
||||||
|
|
||||||
|
|
||||||
class Episode(TypedDict):
|
|
||||||
id: str
|
|
||||||
anime_id: int
|
|
||||||
episode: float
|
|
||||||
episode2: int
|
|
||||||
edition: str
|
|
||||||
title: str
|
|
||||||
snapshot: str # episode image
|
|
||||||
disc: str
|
|
||||||
audio: Literal["eng", "jpn"]
|
|
||||||
duration: str # time 00:00:00
|
|
||||||
session: str
|
|
||||||
filler: int
|
|
||||||
created_at: str
|
|
||||||
|
|
||||||
|
|
||||||
class AnimePaheAnimePage(TypedDict):
|
|
||||||
total: int
|
|
||||||
per_page: int
|
|
||||||
current_page: int
|
|
||||||
last_page: int
|
|
||||||
next_page_url: str | None
|
|
||||||
prev_page_url: str | None
|
|
||||||
_from: int
|
|
||||||
to: int
|
|
||||||
data: list[Episode]
|
|
||||||
|
|
||||||
|
|
||||||
class AnimePaheEpisodeInfo(TypedDict):
|
|
||||||
title: str
|
|
||||||
episode: float
|
|
||||||
id: str
|
|
||||||
translation_type: Literal["eng", "jpn"]
|
|
||||||
duration: str
|
|
||||||
poster: str
|
|
||||||
|
|
||||||
|
|
||||||
class AvailableEpisodesDetail(TypedDict):
|
|
||||||
sub: list[str]
|
|
||||||
dub: list[str]
|
|
||||||
raw: list[str]
|
|
||||||
|
|
||||||
|
|
||||||
class AnimePaheAnime(TypedDict):
|
|
||||||
id: str
|
|
||||||
title: str
|
|
||||||
year: int
|
|
||||||
season: str
|
|
||||||
poster: str
|
|
||||||
score: int
|
|
||||||
availableEpisodesDetail: AvailableEpisodesDetail
|
|
||||||
episodesInfo: list[AnimePaheEpisodeInfo]
|
|
||||||
|
|
||||||
|
|
||||||
class PageInfo(TypedDict):
|
|
||||||
total: int
|
|
||||||
perPage: int
|
|
||||||
currentPage: int
|
|
||||||
|
|
||||||
|
|
||||||
class AnimePaheSearchResults(TypedDict):
|
|
||||||
pageInfo: PageInfo
|
|
||||||
results: list[AnimePaheSearchResult]
|
|
||||||
|
|
||||||
|
|
||||||
class AnimePaheStreamLink(TypedDict):
|
|
||||||
quality: str
|
|
||||||
translation_type: Literal["sub", "dub"]
|
|
||||||
link: str
|
|
||||||
|
|
||||||
|
|
||||||
class AnimePaheServer(TypedDict):
|
|
||||||
server: Literal["kwik"]
|
|
||||||
links: list[AnimePaheStreamLink]
|
|
||||||
episode_title: str
|
|
||||||
subtitles: list
|
|
||||||
headers: dict
|
|
||||||
@@ -1,17 +0,0 @@
|
|||||||
import re
|
|
||||||
|
|
||||||
ANIMEUNITY = "animeunity.so"
|
|
||||||
ANIMEUNITY_BASE = f"https://www.{ANIMEUNITY}"
|
|
||||||
|
|
||||||
MAX_TIMEOUT = 10
|
|
||||||
TOKEN_REGEX = re.compile(r'<meta.*?name="csrf-token".*?content="([^"]*)".*?>')
|
|
||||||
|
|
||||||
REPLACEMENT_WORDS = {"Season ": "", "Cour": "Part"}
|
|
||||||
|
|
||||||
# Server Specific
|
|
||||||
AVAILABLE_VIDEO_QUALITY = ["1080", "720", "480"]
|
|
||||||
VIDEO_INFO_REGEX = re.compile(r"window.video\s*=\s*(\{[^\}]*\})")
|
|
||||||
VIDEO_INFO_CLEAN_REGEX = re.compile(r'(?<!["\'])(\b\w+\b)(?=\s*:)')
|
|
||||||
DOWNLOAD_FILENAME_REGEX = re.compile(r"[?&]filename=([^&]+)")
|
|
||||||
QUALITY_REGEX = re.compile(r"/(\d{3,4}p)")
|
|
||||||
DOWNLOAD_URL_REGEX = re.compile(r"window.downloadUrl\s*=\s*'([^']*)'")
|
|
||||||
@@ -1,50 +0,0 @@
|
|||||||
import logging
|
|
||||||
|
|
||||||
from .constants import (
|
|
||||||
DOWNLOAD_FILENAME_REGEX,
|
|
||||||
DOWNLOAD_URL_REGEX,
|
|
||||||
QUALITY_REGEX,
|
|
||||||
VIDEO_INFO_CLEAN_REGEX,
|
|
||||||
VIDEO_INFO_REGEX,
|
|
||||||
)
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
|
|
||||||
def extract_server_info(html_content: str, episode_title: str | None) -> dict | None:
|
|
||||||
"""
|
|
||||||
Extracts server information from the VixCloud/AnimeUnity embed page.
|
|
||||||
Handles extraction from both window.video object and download URL.
|
|
||||||
"""
|
|
||||||
video_info = VIDEO_INFO_REGEX.search(html_content)
|
|
||||||
download_url_match = DOWNLOAD_URL_REGEX.search(html_content)
|
|
||||||
|
|
||||||
if not (download_url_match and video_info):
|
|
||||||
return None
|
|
||||||
|
|
||||||
info_str = VIDEO_INFO_CLEAN_REGEX.sub(r'"\1"', video_info.group(1))
|
|
||||||
|
|
||||||
# Use eval context for JS constants
|
|
||||||
ctx = {"null": None, "true": True, "false": False}
|
|
||||||
try:
|
|
||||||
info = eval(info_str, ctx)
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Failed to parse JS object: {e}")
|
|
||||||
return None
|
|
||||||
|
|
||||||
download_url = download_url_match.group(1)
|
|
||||||
info["link"] = download_url
|
|
||||||
|
|
||||||
# Extract metadata from download URL if missing in window.video
|
|
||||||
if filename_match := DOWNLOAD_FILENAME_REGEX.search(download_url):
|
|
||||||
info["name"] = filename_match.group(1)
|
|
||||||
else:
|
|
||||||
info["name"] = f"{episode_title or 'Unknown'}"
|
|
||||||
|
|
||||||
if quality_match := QUALITY_REGEX.search(download_url):
|
|
||||||
# "720p" -> 720
|
|
||||||
info["quality"] = int(quality_match.group(1)[:-1])
|
|
||||||
else:
|
|
||||||
info["quality"] = 0 # Fallback
|
|
||||||
|
|
||||||
return info
|
|
||||||
@@ -1,133 +0,0 @@
|
|||||||
from typing import Literal
|
|
||||||
|
|
||||||
from ..types import (
|
|
||||||
Anime,
|
|
||||||
AnimeEpisodeInfo,
|
|
||||||
AnimeEpisodes,
|
|
||||||
EpisodeStream,
|
|
||||||
MediaTranslationType,
|
|
||||||
PageInfo,
|
|
||||||
SearchResult,
|
|
||||||
SearchResults,
|
|
||||||
Server,
|
|
||||||
)
|
|
||||||
from .constants import AVAILABLE_VIDEO_QUALITY
|
|
||||||
|
|
||||||
|
|
||||||
def map_to_search_results(
|
|
||||||
data: dict, translation_type: Literal["sub", "dub"]
|
|
||||||
) -> SearchResults:
|
|
||||||
results = []
|
|
||||||
for result in data:
|
|
||||||
mapped_result = map_to_search_result(result, translation_type)
|
|
||||||
if mapped_result:
|
|
||||||
results.append(mapped_result)
|
|
||||||
|
|
||||||
return SearchResults(
|
|
||||||
page_info=PageInfo(),
|
|
||||||
results=results,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def map_to_search_result(
|
|
||||||
data: dict, translation_type: Literal["sub", "dub"] | None
|
|
||||||
) -> SearchResult | None:
|
|
||||||
if translation_type and data["dub"] != 1 if translation_type == "dub" else 0:
|
|
||||||
return None
|
|
||||||
return SearchResult(
|
|
||||||
id=str(data["id"]),
|
|
||||||
title=get_titles(data)[0] if get_titles(data) else "Unknown",
|
|
||||||
episodes=AnimeEpisodes(
|
|
||||||
sub=(
|
|
||||||
list(map(str, range(1, get_episodes_count(data) + 1)))
|
|
||||||
if data["dub"] == 0
|
|
||||||
else []
|
|
||||||
),
|
|
||||||
dub=(
|
|
||||||
list(map(str, range(1, get_episodes_count(data) + 1)))
|
|
||||||
if data["dub"] == 1
|
|
||||||
else []
|
|
||||||
),
|
|
||||||
),
|
|
||||||
other_titles=get_titles(data),
|
|
||||||
score=data["score"],
|
|
||||||
poster=data["imageurl"],
|
|
||||||
year=data["date"],
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def map_to_anime_result(data: list, search_result: SearchResult) -> Anime:
|
|
||||||
return Anime(
|
|
||||||
id=search_result.id,
|
|
||||||
title=search_result.title,
|
|
||||||
episodes=AnimeEpisodes(
|
|
||||||
sub=[
|
|
||||||
episode["number"]
|
|
||||||
for episode in data
|
|
||||||
if len(search_result.episodes.sub) > 0
|
|
||||||
],
|
|
||||||
dub=[
|
|
||||||
episode["number"]
|
|
||||||
for episode in data
|
|
||||||
if len(search_result.episodes.dub) > 0
|
|
||||||
],
|
|
||||||
),
|
|
||||||
episodes_info=[
|
|
||||||
AnimeEpisodeInfo(
|
|
||||||
id=str(episode["id"]),
|
|
||||||
episode=episode["number"],
|
|
||||||
title=f"{search_result.title} - Ep {episode['number']}",
|
|
||||||
)
|
|
||||||
for episode in data
|
|
||||||
],
|
|
||||||
type=search_result.media_type,
|
|
||||||
poster=search_result.poster,
|
|
||||||
year=search_result.year,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def map_to_server(
|
|
||||||
episode: AnimeEpisodeInfo, info: dict, translation_type: Literal["sub", "dub"]
|
|
||||||
) -> Server:
|
|
||||||
return Server(
|
|
||||||
name="vixcloud",
|
|
||||||
links=[
|
|
||||||
EpisodeStream(
|
|
||||||
link=info["link"].replace(str(info["quality"]), quality),
|
|
||||||
title=info["name"],
|
|
||||||
quality=quality, # type: ignore
|
|
||||||
translation_type=MediaTranslationType(translation_type),
|
|
||||||
mp4=True,
|
|
||||||
)
|
|
||||||
for quality in sorted(
|
|
||||||
list(set(AVAILABLE_VIDEO_QUALITY + [str(info["quality"])])),
|
|
||||||
key=lambda x: int(x),
|
|
||||||
reverse=True,
|
|
||||||
)
|
|
||||||
if int(quality) <= info["quality"]
|
|
||||||
],
|
|
||||||
episode_title=episode.title,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def get_titles(data: dict) -> list[str]:
|
|
||||||
"""
|
|
||||||
Return the most appropriate title from the record.
|
|
||||||
"""
|
|
||||||
titles = []
|
|
||||||
if data.get("title_eng"):
|
|
||||||
titles.append(data["title_eng"])
|
|
||||||
if data.get("title"):
|
|
||||||
titles.append(data["title"])
|
|
||||||
if data.get("title_it"):
|
|
||||||
titles.append(data["title_it"])
|
|
||||||
return titles
|
|
||||||
|
|
||||||
|
|
||||||
def get_episodes_count(record: dict) -> int:
|
|
||||||
"""
|
|
||||||
Return the number of episodes from the record.
|
|
||||||
"""
|
|
||||||
if (count := record.get("real_episodes_count", 0)) > 0:
|
|
||||||
return count
|
|
||||||
return record.get("episodes_count", 0)
|
|
||||||
@@ -1,170 +0,0 @@
|
|||||||
import logging
|
|
||||||
from functools import lru_cache
|
|
||||||
|
|
||||||
from ...scraping.user_agents import UserAgentGenerator
|
|
||||||
from ..base import BaseAnimeProvider
|
|
||||||
from ..params import AnimeParams, EpisodeStreamsParams, SearchParams
|
|
||||||
from ..types import Anime, AnimeEpisodeInfo, SearchResult, SearchResults
|
|
||||||
from ..utils.debug import debug_provider
|
|
||||||
from .constants import (
|
|
||||||
ANIMEUNITY_BASE,
|
|
||||||
MAX_TIMEOUT,
|
|
||||||
REPLACEMENT_WORDS,
|
|
||||||
TOKEN_REGEX,
|
|
||||||
)
|
|
||||||
from .extractor import extract_server_info
|
|
||||||
from .mappers import (
|
|
||||||
map_to_anime_result,
|
|
||||||
map_to_search_result,
|
|
||||||
map_to_search_results,
|
|
||||||
map_to_server,
|
|
||||||
)
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
|
|
||||||
class AnimeUnity(BaseAnimeProvider):
|
|
||||||
HEADERS = {
|
|
||||||
"User-Agent": UserAgentGenerator().random(),
|
|
||||||
}
|
|
||||||
_cache = dict[str, SearchResult]()
|
|
||||||
|
|
||||||
@lru_cache
|
|
||||||
def _get_token(self) -> None:
|
|
||||||
response = self.client.get(
|
|
||||||
ANIMEUNITY_BASE,
|
|
||||||
headers=self.HEADERS,
|
|
||||||
timeout=MAX_TIMEOUT,
|
|
||||||
follow_redirects=True,
|
|
||||||
)
|
|
||||||
response.raise_for_status()
|
|
||||||
token_match = TOKEN_REGEX.search(response.text)
|
|
||||||
if token_match:
|
|
||||||
self.HEADERS["x-csrf-token"] = token_match.group(1)
|
|
||||||
self.client.cookies = {
|
|
||||||
"animeunity_session": response.cookies.get("animeunity_session") or ""
|
|
||||||
}
|
|
||||||
self.client.headers = self.HEADERS
|
|
||||||
|
|
||||||
@debug_provider
|
|
||||||
def search(self, params: SearchParams) -> SearchResults | None:
|
|
||||||
if not (res := self._search(params)):
|
|
||||||
return None
|
|
||||||
|
|
||||||
for result in res.results:
|
|
||||||
self._cache[result.id] = result
|
|
||||||
|
|
||||||
return res
|
|
||||||
|
|
||||||
@lru_cache
|
|
||||||
def _search(self, params: SearchParams) -> SearchResults | None:
|
|
||||||
self._get_token()
|
|
||||||
# Replace words in query to
|
|
||||||
query = params.query
|
|
||||||
for old, new in REPLACEMENT_WORDS.items():
|
|
||||||
query = query.replace(old, new)
|
|
||||||
|
|
||||||
response = self.client.post(
|
|
||||||
url=f"{ANIMEUNITY_BASE}/livesearch",
|
|
||||||
data={"title": query},
|
|
||||||
timeout=MAX_TIMEOUT,
|
|
||||||
)
|
|
||||||
|
|
||||||
response.raise_for_status()
|
|
||||||
return map_to_search_results(
|
|
||||||
response.json().get("records", []), params.translation_type
|
|
||||||
)
|
|
||||||
|
|
||||||
@debug_provider
|
|
||||||
def get(self, params: AnimeParams) -> Anime | None:
|
|
||||||
return self._get_anime(params)
|
|
||||||
|
|
||||||
@lru_cache()
|
|
||||||
def _get_search_result(self, params: AnimeParams) -> SearchResult | None:
|
|
||||||
if cached := self._cache.get(params.id):
|
|
||||||
return cached
|
|
||||||
|
|
||||||
response = self.client.get(
|
|
||||||
url=f"{ANIMEUNITY_BASE}/info_api/{params.id}/",
|
|
||||||
timeout=MAX_TIMEOUT,
|
|
||||||
)
|
|
||||||
response.raise_for_status()
|
|
||||||
data = response.json()
|
|
||||||
|
|
||||||
if res := map_to_search_result(data, None):
|
|
||||||
self._cache[params.id] = res
|
|
||||||
return res
|
|
||||||
|
|
||||||
@lru_cache
|
|
||||||
def _get_anime(self, params: AnimeParams) -> Anime | None:
|
|
||||||
if (search_result := self._get_search_result(params)) is None:
|
|
||||||
logger.error(f"No search result found for ID {params.id}")
|
|
||||||
return None
|
|
||||||
|
|
||||||
# Fetch episodes in chunks
|
|
||||||
data = []
|
|
||||||
start_range = 1
|
|
||||||
episode_count = max(
|
|
||||||
len(search_result.episodes.sub), len(search_result.episodes.dub)
|
|
||||||
)
|
|
||||||
while start_range <= episode_count:
|
|
||||||
end_range = min(start_range + 119, episode_count)
|
|
||||||
response = self.client.get(
|
|
||||||
url=f"{ANIMEUNITY_BASE}/info_api/{params.id}/1",
|
|
||||||
params={
|
|
||||||
"start_range": start_range,
|
|
||||||
"end_range": end_range,
|
|
||||||
},
|
|
||||||
timeout=MAX_TIMEOUT,
|
|
||||||
)
|
|
||||||
response.raise_for_status()
|
|
||||||
data.extend(response.json().get("episodes", []))
|
|
||||||
start_range = end_range + 1
|
|
||||||
|
|
||||||
return map_to_anime_result(data, search_result)
|
|
||||||
|
|
||||||
@lru_cache()
|
|
||||||
def _get_episode_info(
|
|
||||||
self, params: EpisodeStreamsParams
|
|
||||||
) -> AnimeEpisodeInfo | None:
|
|
||||||
anime_info = self._get_anime(
|
|
||||||
AnimeParams(id=params.anime_id, query=params.query)
|
|
||||||
)
|
|
||||||
if not anime_info:
|
|
||||||
logger.error(f"No anime info for {params.anime_id}")
|
|
||||||
return
|
|
||||||
if not anime_info.episodes_info:
|
|
||||||
logger.error(f"No episodes info for {params.anime_id}")
|
|
||||||
return
|
|
||||||
for episode in anime_info.episodes_info:
|
|
||||||
if episode.episode == params.episode:
|
|
||||||
return episode
|
|
||||||
|
|
||||||
@debug_provider
|
|
||||||
def episode_streams(self, params: EpisodeStreamsParams):
|
|
||||||
if not (episode := self._get_episode_info(params)):
|
|
||||||
logger.error(
|
|
||||||
f"Episode {params.episode} doesn't exist for anime {params.anime_id}"
|
|
||||||
)
|
|
||||||
return
|
|
||||||
# Get the Server url
|
|
||||||
response = self.client.get(
|
|
||||||
url=f"{ANIMEUNITY_BASE}/embed-url/{episode.id}", timeout=MAX_TIMEOUT
|
|
||||||
)
|
|
||||||
response.raise_for_status()
|
|
||||||
|
|
||||||
# Fetch the Server page
|
|
||||||
video_response = self.client.get(url=response.text.strip(), timeout=MAX_TIMEOUT)
|
|
||||||
video_response.raise_for_status()
|
|
||||||
|
|
||||||
if not (info := extract_server_info(video_response.text, episode.title)):
|
|
||||||
logger.error(f"Failed to extract video info for episode {episode.id}")
|
|
||||||
return None
|
|
||||||
|
|
||||||
yield map_to_server(episode, info, params.translation_type)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
from ..utils.debug import test_anime_provider
|
|
||||||
|
|
||||||
test_anime_provider(AnimeUnity)
|
|
||||||
@@ -1,39 +0,0 @@
|
|||||||
from abc import ABC, abstractmethod
|
|
||||||
from typing import TYPE_CHECKING, ClassVar, Dict
|
|
||||||
|
|
||||||
from .params import AnimeParams, EpisodeStreamsParams, SearchParams
|
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
|
||||||
from collections.abc import Iterator
|
|
||||||
|
|
||||||
from httpx import Client
|
|
||||||
|
|
||||||
from .types import Anime, SearchResults, Server
|
|
||||||
|
|
||||||
|
|
||||||
class BaseAnimeProvider(ABC):
|
|
||||||
HEADERS: ClassVar[Dict[str, str]]
|
|
||||||
|
|
||||||
def __init_subclass__(cls, **kwargs):
|
|
||||||
super().__init_subclass__(**kwargs)
|
|
||||||
if not hasattr(cls, "HEADERS"):
|
|
||||||
raise TypeError(
|
|
||||||
"Subclasses of BaseAnimeProvider must define a 'HEADERS' class attribute."
|
|
||||||
)
|
|
||||||
|
|
||||||
def __init__(self, client: "Client") -> None:
|
|
||||||
self.client = client
|
|
||||||
|
|
||||||
@abstractmethod
|
|
||||||
def search(self, params: SearchParams) -> "SearchResults | None":
|
|
||||||
pass
|
|
||||||
|
|
||||||
@abstractmethod
|
|
||||||
def get(self, params: AnimeParams) -> "Anime | None":
|
|
||||||
pass
|
|
||||||
|
|
||||||
@abstractmethod
|
|
||||||
def episode_streams(
|
|
||||||
self, params: EpisodeStreamsParams
|
|
||||||
) -> "Iterator[Server] | None":
|
|
||||||
pass
|
|
||||||
@@ -1,46 +0,0 @@
|
|||||||
from dataclasses import dataclass
|
|
||||||
from typing import Literal, Optional
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass(frozen=True)
|
|
||||||
class SearchParams:
|
|
||||||
"""Parameters for searching anime."""
|
|
||||||
|
|
||||||
query: str
|
|
||||||
|
|
||||||
# pagination and sorting
|
|
||||||
current_page: int = 1
|
|
||||||
page_limit: int = 20
|
|
||||||
sort_by: str = "relevance"
|
|
||||||
order: Literal["asc", "desc"] = "desc"
|
|
||||||
|
|
||||||
# filters
|
|
||||||
translation_type: Literal["sub", "dub"] = "sub"
|
|
||||||
genre: Optional[str] = None
|
|
||||||
year: Optional[int] = None
|
|
||||||
status: Optional[str] = None
|
|
||||||
allow_nsfw: bool = True
|
|
||||||
allow_unknown: bool = True
|
|
||||||
country_of_origin: Optional[str] = None
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass(frozen=True)
|
|
||||||
class EpisodeStreamsParams:
|
|
||||||
"""Parameters for fetching episode streams."""
|
|
||||||
|
|
||||||
query: str
|
|
||||||
anime_id: str
|
|
||||||
episode: str
|
|
||||||
translation_type: Literal["sub", "dub"] = "sub"
|
|
||||||
server: Optional[str] = None
|
|
||||||
quality: Literal["1080", "720", "480", "360"] = "720"
|
|
||||||
subtitles: bool = True
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass(frozen=True)
|
|
||||||
class AnimeParams:
|
|
||||||
"""Parameters for fetching anime details."""
|
|
||||||
|
|
||||||
id: str
|
|
||||||
# HACK: for the sake of providers which require previous data
|
|
||||||
query: str
|
|
||||||
@@ -1,72 +0,0 @@
|
|||||||
import importlib
|
|
||||||
import logging
|
|
||||||
|
|
||||||
from httpx import Client
|
|
||||||
|
|
||||||
from .base import BaseAnimeProvider
|
|
||||||
from .types import ProviderName
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
PROVIDERS_AVAILABLE = {
|
|
||||||
"allanime": "provider.AllAnime",
|
|
||||||
"animepahe": "provider.AnimePahe",
|
|
||||||
"hianime": "provider.HiAnime",
|
|
||||||
"nyaa": "provider.Nyaa",
|
|
||||||
"yugen": "provider.Yugen",
|
|
||||||
"animeunity": "provider.AnimeUnity",
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
class AnimeProviderFactory:
|
|
||||||
"""Factory for creating anime provider instances."""
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def create(provider_name: ProviderName) -> BaseAnimeProvider:
|
|
||||||
"""
|
|
||||||
Dynamically creates an instance of the specified anime provider.
|
|
||||||
|
|
||||||
This method imports the necessary provider module, instantiates its main class,
|
|
||||||
and injects a pre-configured HTTP client.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
provider_name: The name of the provider to create (e.g., 'allanime').
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
An instance of a class that inherits from BaseProvider.
|
|
||||||
|
|
||||||
Raises:
|
|
||||||
ValueError: If the provider_name is not supported.
|
|
||||||
ImportError: If the provider module or class cannot be found.
|
|
||||||
"""
|
|
||||||
from ....core.utils.networking import random_user_agent
|
|
||||||
|
|
||||||
# Correctly determine module and class name from the map
|
|
||||||
import_path = PROVIDERS_AVAILABLE[provider_name.value.lower()]
|
|
||||||
module_name, class_name = import_path.split(".", 1)
|
|
||||||
|
|
||||||
# Construct the full package path for dynamic import
|
|
||||||
package_path = f"viu_media.libs.provider.anime.{provider_name.value.lower()}"
|
|
||||||
|
|
||||||
try:
|
|
||||||
provider_module = importlib.import_module(f".{module_name}", package_path)
|
|
||||||
provider_class = getattr(provider_module, class_name)
|
|
||||||
except (ImportError, AttributeError) as e:
|
|
||||||
logger.error(
|
|
||||||
f"Failed to load provider '{provider_name.value.lower()}': {e}"
|
|
||||||
)
|
|
||||||
raise ImportError(
|
|
||||||
f"Could not load provider '{provider_name.value.lower()}'. "
|
|
||||||
"Check the module path and class name in PROVIDERS_AVAILABLE."
|
|
||||||
) from e
|
|
||||||
|
|
||||||
# Each provider class requires an httpx.Client, which we set up here.
|
|
||||||
client = Client(
|
|
||||||
headers={"User-Agent": random_user_agent(), **provider_class.HEADERS}
|
|
||||||
)
|
|
||||||
|
|
||||||
return provider_class(client)
|
|
||||||
|
|
||||||
|
|
||||||
# Simple alias for ease of use, consistent with other factories in the codebase.
|
|
||||||
create_provider = AnimeProviderFactory.create
|
|
||||||
@@ -1,119 +0,0 @@
|
|||||||
from enum import Enum
|
|
||||||
from typing import List, Literal, Optional
|
|
||||||
|
|
||||||
from pydantic import BaseModel, ConfigDict
|
|
||||||
|
|
||||||
# from .allanime.types import Server as AllAnimeServer
|
|
||||||
# from .animepahe.types import Server as AnimePaheServer
|
|
||||||
|
|
||||||
|
|
||||||
# ENUMS
|
|
||||||
class ProviderName(Enum):
|
|
||||||
ALLANIME = "allanime"
|
|
||||||
ANIMEPAHE = "animepahe"
|
|
||||||
ANIMEUNITY = "animeunity"
|
|
||||||
|
|
||||||
|
|
||||||
class ProviderServer(Enum):
|
|
||||||
TOP = "TOP"
|
|
||||||
|
|
||||||
# AllAnimeServer values
|
|
||||||
SHAREPOINT = "sharepoint"
|
|
||||||
DROPBOX = "dropbox"
|
|
||||||
GOGOANIME = "gogoanime"
|
|
||||||
WETRANSFER = "weTransfer"
|
|
||||||
WIXMP = "wixmp"
|
|
||||||
YT = "Yt"
|
|
||||||
MP4_UPLOAD = "mp4-upload"
|
|
||||||
|
|
||||||
# AnimePaheServer values
|
|
||||||
KWIK = "kwik"
|
|
||||||
|
|
||||||
# AnimeUnityServer values
|
|
||||||
VIXCLOUD = "vixcloud"
|
|
||||||
|
|
||||||
|
|
||||||
class MediaTranslationType(Enum):
|
|
||||||
SUB = "sub"
|
|
||||||
DUB = "dub"
|
|
||||||
RAW = "raw"
|
|
||||||
|
|
||||||
|
|
||||||
# MODELS
|
|
||||||
class BaseAnimeProviderModel(BaseModel):
|
|
||||||
model_config = ConfigDict(frozen=True)
|
|
||||||
|
|
||||||
|
|
||||||
class PageInfo(BaseAnimeProviderModel):
|
|
||||||
total: Optional[int] = None
|
|
||||||
per_page: Optional[int] = None
|
|
||||||
current_page: Optional[int] = None
|
|
||||||
|
|
||||||
|
|
||||||
class AnimeEpisodes(BaseAnimeProviderModel):
|
|
||||||
sub: List[str]
|
|
||||||
dub: List[str] = []
|
|
||||||
raw: List[str] = []
|
|
||||||
|
|
||||||
|
|
||||||
class SearchResult(BaseAnimeProviderModel):
|
|
||||||
id: str
|
|
||||||
title: str
|
|
||||||
episodes: AnimeEpisodes
|
|
||||||
other_titles: List[str] = []
|
|
||||||
media_type: Optional[str] = None
|
|
||||||
score: Optional[float] = None
|
|
||||||
status: Optional[str] = None
|
|
||||||
season: Optional[str] = None
|
|
||||||
poster: Optional[str] = None
|
|
||||||
year: Optional[str] = None
|
|
||||||
|
|
||||||
|
|
||||||
class SearchResults(BaseAnimeProviderModel):
|
|
||||||
page_info: PageInfo
|
|
||||||
results: List[SearchResult]
|
|
||||||
|
|
||||||
|
|
||||||
class AnimeEpisodeInfo(BaseAnimeProviderModel):
|
|
||||||
id: str
|
|
||||||
episode: str
|
|
||||||
session_id: Optional[str] = None
|
|
||||||
title: Optional[str] = None
|
|
||||||
poster: Optional[str] = None
|
|
||||||
duration: Optional[str] = None
|
|
||||||
|
|
||||||
|
|
||||||
class Anime(BaseAnimeProviderModel):
|
|
||||||
id: str
|
|
||||||
title: str
|
|
||||||
episodes: AnimeEpisodes
|
|
||||||
type: Optional[str] = None
|
|
||||||
episodes_info: List[AnimeEpisodeInfo] | None = None
|
|
||||||
poster: Optional[str] = None
|
|
||||||
year: Optional[str] = None
|
|
||||||
|
|
||||||
|
|
||||||
class EpisodeStream(BaseAnimeProviderModel):
|
|
||||||
# episode: str
|
|
||||||
link: str
|
|
||||||
title: Optional[str] = None
|
|
||||||
quality: Literal["360", "480", "720", "1080"] = "720"
|
|
||||||
translation_type: MediaTranslationType = MediaTranslationType.SUB
|
|
||||||
format: Optional[str] = None
|
|
||||||
hls: Optional[bool] = None
|
|
||||||
mp4: Optional[bool] = None
|
|
||||||
priority: Optional[int] = None
|
|
||||||
|
|
||||||
|
|
||||||
class Subtitle(BaseAnimeProviderModel):
|
|
||||||
url: str
|
|
||||||
language: Optional[str] = None
|
|
||||||
|
|
||||||
|
|
||||||
class Server(BaseAnimeProviderModel):
|
|
||||||
name: str
|
|
||||||
links: List[EpisodeStream]
|
|
||||||
episode_title: Optional[str] = None
|
|
||||||
headers: dict[str, str] = dict()
|
|
||||||
subtitles: List[Subtitle] = []
|
|
||||||
audio: List[str] = []
|
|
||||||
@@ -1,90 +0,0 @@
|
|||||||
import functools
|
|
||||||
import logging
|
|
||||||
import os
|
|
||||||
from typing import Type
|
|
||||||
|
|
||||||
from ..base import BaseAnimeProvider
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
|
|
||||||
def debug_provider(provider_function):
|
|
||||||
@functools.wraps(provider_function)
|
|
||||||
def _provider_function_wrapper(self, *args, **kwargs):
|
|
||||||
provider_name = self.__class__.__name__.upper()
|
|
||||||
if not os.environ.get("VIU_DEBUG"):
|
|
||||||
try:
|
|
||||||
return provider_function(self, *args, **kwargs)
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"[{provider_name}@{provider_function.__name__}]: {e}")
|
|
||||||
else:
|
|
||||||
return provider_function(self, *args, **kwargs)
|
|
||||||
|
|
||||||
return _provider_function_wrapper
|
|
||||||
|
|
||||||
|
|
||||||
def test_anime_provider(AnimeProvider: Type[BaseAnimeProvider]):
|
|
||||||
import shutil
|
|
||||||
import subprocess
|
|
||||||
|
|
||||||
from httpx import Client
|
|
||||||
|
|
||||||
from .....core.constants import APP_ASCII_ART
|
|
||||||
from .....core.utils.networking import random_user_agent
|
|
||||||
from ..params import AnimeParams, EpisodeStreamsParams, SearchParams
|
|
||||||
|
|
||||||
anime_provider = AnimeProvider(
|
|
||||||
Client(headers={"User-Agent": random_user_agent(), **AnimeProvider.HEADERS})
|
|
||||||
)
|
|
||||||
print(APP_ASCII_ART.read_text(encoding="utf-8"))
|
|
||||||
query = input("What anime would you like to stream: ")
|
|
||||||
search_results = anime_provider.search(SearchParams(query=query))
|
|
||||||
if not search_results:
|
|
||||||
return
|
|
||||||
for i, search_result in enumerate(search_results.results):
|
|
||||||
print(f"{i + 1}: {search_result.title}")
|
|
||||||
result = search_results.results[
|
|
||||||
int(input(f"Select result (1-{len(search_results.results)}): ")) - 1
|
|
||||||
]
|
|
||||||
anime = anime_provider.get(AnimeParams(id=result.id, query=query))
|
|
||||||
|
|
||||||
if not anime:
|
|
||||||
return
|
|
||||||
translation_type = input("Preferred Translation Type: [dub,sub,raw]: ")
|
|
||||||
for episode in getattr(anime.episodes, translation_type):
|
|
||||||
print(episode)
|
|
||||||
episode_number = input("What episode do you wish to watch: ")
|
|
||||||
episode_streams = anime_provider.episode_streams(
|
|
||||||
EpisodeStreamsParams(
|
|
||||||
query=query,
|
|
||||||
anime_id=anime.id,
|
|
||||||
episode=episode_number,
|
|
||||||
translation_type=translation_type, # type:ignore
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
if not episode_streams:
|
|
||||||
return
|
|
||||||
episode_streams = list(episode_streams)
|
|
||||||
for i, stream in enumerate(episode_streams):
|
|
||||||
print(f"{i + 1}: {stream.name}")
|
|
||||||
stream = episode_streams[int(input("Select your preferred server: ")) - 1]
|
|
||||||
for i, link in enumerate(stream.links):
|
|
||||||
print(f"{i + 1}: {link.quality}")
|
|
||||||
link = stream.links[int(input("Select your preferred quality: ")) - 1]
|
|
||||||
if executable := shutil.which("mpv"):
|
|
||||||
cmd = executable
|
|
||||||
elif executable := shutil.which("xdg-open"):
|
|
||||||
cmd = executable
|
|
||||||
elif executable := shutil.which("open"):
|
|
||||||
cmd = executable
|
|
||||||
else:
|
|
||||||
return
|
|
||||||
|
|
||||||
print(
|
|
||||||
"Now streaming: ",
|
|
||||||
anime.title,
|
|
||||||
"Episode: ",
|
|
||||||
stream.episode_title if stream.episode_title else episode_number,
|
|
||||||
)
|
|
||||||
subprocess.run([cmd, link.link])
|
|
||||||
@@ -1,494 +0,0 @@
|
|||||||
# pyright: reportAttributeAccessIssue=false, reportPossiblyUnboundVariable=false
|
|
||||||
"""
|
|
||||||
HTML parsing utilities with optional lxml support.
|
|
||||||
|
|
||||||
This module provides comprehensive HTML parsing capabilities using either
|
|
||||||
Python's built-in html.parser or lxml for better performance when available.
|
|
||||||
"""
|
|
||||||
|
|
||||||
# TODO: Review and optimize the HTML parsing logic for better performance and flexibility.
|
|
||||||
# Consider adding more utility functions for common HTML manipulation tasks.
|
|
||||||
import logging
|
|
||||||
import re
|
|
||||||
from html.parser import HTMLParser as BaseHTMLParser
|
|
||||||
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union
|
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
|
||||||
from lxml import etree
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
# Try to import lxml
|
|
||||||
HAS_LXML = False
|
|
||||||
try:
|
|
||||||
from lxml import etree, html as lxml_html
|
|
||||||
|
|
||||||
HAS_LXML = True
|
|
||||||
logger.debug("lxml is available and will be used for HTML parsing")
|
|
||||||
except ImportError:
|
|
||||||
logger.debug("lxml not available, falling back to html.parser")
|
|
||||||
|
|
||||||
|
|
||||||
class HTMLParserConfig:
|
|
||||||
"""Configuration for HTML parser selection."""
|
|
||||||
|
|
||||||
def __init__(self, use_lxml: Optional[bool] = None):
|
|
||||||
"""
|
|
||||||
Initialize parser configuration.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
use_lxml: Force use of lxml (True), html.parser (False), or auto-detect (None)
|
|
||||||
"""
|
|
||||||
if use_lxml is None:
|
|
||||||
self.use_lxml = HAS_LXML
|
|
||||||
else:
|
|
||||||
self.use_lxml = use_lxml and HAS_LXML
|
|
||||||
|
|
||||||
if use_lxml and not HAS_LXML:
|
|
||||||
logger.warning(
|
|
||||||
"lxml requested but not available, falling back to html.parser"
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class HTMLParser:
|
|
||||||
"""
|
|
||||||
Comprehensive HTML parser with optional lxml support.
|
|
||||||
|
|
||||||
Provides a unified interface for HTML parsing operations regardless
|
|
||||||
of the underlying parser implementation.
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, config: Optional[HTMLParserConfig] = None):
|
|
||||||
"""Initialize the HTML parser with configuration."""
|
|
||||||
self.config = config or HTMLParserConfig()
|
|
||||||
|
|
||||||
def parse(self, html_content: str) -> Union[Any, "ParsedHTML"]:
|
|
||||||
"""
|
|
||||||
Parse HTML content and return a parsed tree.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
html_content: Raw HTML string to parse
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Parsed HTML tree (lxml Element or custom ParsedHTML object)
|
|
||||||
"""
|
|
||||||
if self.config.use_lxml:
|
|
||||||
return self._parse_with_lxml(html_content)
|
|
||||||
else:
|
|
||||||
return self._parse_with_builtin(html_content)
|
|
||||||
|
|
||||||
def _parse_with_lxml(self, html_content: str) -> Any:
|
|
||||||
"""Parse HTML using lxml."""
|
|
||||||
try:
|
|
||||||
# Use lxml's HTML parser which is more lenient
|
|
||||||
return lxml_html.fromstring(html_content)
|
|
||||||
except Exception as e:
|
|
||||||
logger.warning(f"lxml parsing failed: {e}, falling back to html.parser")
|
|
||||||
return self._parse_with_builtin(html_content)
|
|
||||||
|
|
||||||
def _parse_with_builtin(self, html_content: str) -> "ParsedHTML":
|
|
||||||
"""Parse HTML using Python's built-in parser."""
|
|
||||||
parser = BuiltinHTMLParser()
|
|
||||||
parser.feed(html_content)
|
|
||||||
return ParsedHTML(parser.elements, html_content)
|
|
||||||
|
|
||||||
|
|
||||||
class BuiltinHTMLParser(BaseHTMLParser):
|
|
||||||
"""Enhanced HTML parser using Python's built-in capabilities."""
|
|
||||||
|
|
||||||
def __init__(self):
|
|
||||||
super().__init__()
|
|
||||||
self.elements = []
|
|
||||||
self.current_element = None
|
|
||||||
self.element_stack = []
|
|
||||||
|
|
||||||
def handle_starttag(self, tag: str, attrs: List[Tuple[str, Optional[str]]]):
|
|
||||||
"""Handle opening tags."""
|
|
||||||
element = {
|
|
||||||
"tag": tag,
|
|
||||||
"attrs": dict(attrs),
|
|
||||||
"text": "",
|
|
||||||
"children": [],
|
|
||||||
"start_pos": self.getpos(),
|
|
||||||
}
|
|
||||||
|
|
||||||
if self.element_stack:
|
|
||||||
self.element_stack[-1]["children"].append(element)
|
|
||||||
else:
|
|
||||||
self.elements.append(element)
|
|
||||||
|
|
||||||
self.element_stack.append(element)
|
|
||||||
|
|
||||||
def handle_endtag(self, tag: str):
|
|
||||||
"""Handle closing tags."""
|
|
||||||
if self.element_stack and self.element_stack[-1]["tag"] == tag:
|
|
||||||
element = self.element_stack.pop()
|
|
||||||
element["end_pos"] = self.getpos()
|
|
||||||
|
|
||||||
def handle_data(self, data: str):
|
|
||||||
"""Handle text content."""
|
|
||||||
if self.element_stack:
|
|
||||||
self.element_stack[-1]["text"] += data
|
|
||||||
|
|
||||||
|
|
||||||
class ParsedHTML:
|
|
||||||
"""Wrapper for parsed HTML using built-in parser."""
|
|
||||||
|
|
||||||
def __init__(self, elements: List[Dict], raw_html: str):
|
|
||||||
self.elements = elements
|
|
||||||
self.raw_html = raw_html
|
|
||||||
|
|
||||||
def find_by_id(self, element_id: str) -> Optional[Dict]:
|
|
||||||
"""Find element by ID."""
|
|
||||||
return self._find_recursive(
|
|
||||||
self.elements, lambda el: el["attrs"].get("id") == element_id
|
|
||||||
)
|
|
||||||
|
|
||||||
def find_by_class(self, class_name: str) -> List[Dict]:
|
|
||||||
"""Find elements by class name."""
|
|
||||||
results = []
|
|
||||||
self._find_all_recursive(
|
|
||||||
self.elements,
|
|
||||||
lambda el: class_name in el["attrs"].get("class", "").split(),
|
|
||||||
results,
|
|
||||||
)
|
|
||||||
return results
|
|
||||||
|
|
||||||
def find_by_tag(self, tag_name: str) -> List[Dict]:
|
|
||||||
"""Find elements by tag name."""
|
|
||||||
results = []
|
|
||||||
self._find_all_recursive(
|
|
||||||
self.elements, lambda el: el["tag"].lower() == tag_name.lower(), results
|
|
||||||
)
|
|
||||||
return results
|
|
||||||
|
|
||||||
def _find_recursive(self, elements: List[Dict], condition) -> Optional[Dict]:
|
|
||||||
"""Recursively find first element matching condition."""
|
|
||||||
for element in elements:
|
|
||||||
if condition(element):
|
|
||||||
return element
|
|
||||||
result = self._find_recursive(element["children"], condition)
|
|
||||||
if result:
|
|
||||||
return result
|
|
||||||
return None
|
|
||||||
|
|
||||||
def _find_all_recursive(self, elements: List[Dict], condition, results: List[Dict]):
|
|
||||||
"""Recursively find all elements matching condition."""
|
|
||||||
for element in elements:
|
|
||||||
if condition(element):
|
|
||||||
results.append(element)
|
|
||||||
self._find_all_recursive(element["children"], condition, results)
|
|
||||||
|
|
||||||
|
|
||||||
# Global parser instance
|
|
||||||
_default_parser = HTMLParser()
|
|
||||||
|
|
||||||
|
|
||||||
def extract_attributes(html_element: str) -> Dict[str, str]:
|
|
||||||
"""
|
|
||||||
Extract attributes from an HTML element string.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
html_element: HTML element as string (e.g., '<div class="test" id="main">')
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Dictionary of attribute name-value pairs
|
|
||||||
|
|
||||||
Examples:
|
|
||||||
>>> extract_attributes('<div class="test" id="main">')
|
|
||||||
{'class': 'test', 'id': 'main'}
|
|
||||||
"""
|
|
||||||
if not html_element:
|
|
||||||
return {}
|
|
||||||
|
|
||||||
# Use regex to extract attributes from HTML string
|
|
||||||
attr_pattern = r'(\w+)=(["\'])([^"\']*?)\2'
|
|
||||||
matches = re.findall(attr_pattern, html_element)
|
|
||||||
|
|
||||||
attributes = {}
|
|
||||||
for match in matches:
|
|
||||||
attr_name, _, attr_value = match
|
|
||||||
attributes[attr_name] = attr_value
|
|
||||||
|
|
||||||
# Handle attributes without quotes
|
|
||||||
unquoted_pattern = r"(\w+)=([^\s>]+)"
|
|
||||||
unquoted_matches = re.findall(unquoted_pattern, html_element)
|
|
||||||
for attr_name, attr_value in unquoted_matches:
|
|
||||||
if attr_name not in attributes:
|
|
||||||
attributes[attr_name] = attr_value
|
|
||||||
|
|
||||||
return attributes
|
|
||||||
|
|
||||||
|
|
||||||
def get_element_by_id(element_id: str, html_content: str) -> Optional[str]:
|
|
||||||
"""
|
|
||||||
Get HTML element by ID.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
element_id: The ID attribute value to search for
|
|
||||||
html_content: HTML content to search in
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
HTML string of the element or None if not found
|
|
||||||
|
|
||||||
Examples:
|
|
||||||
>>> html = '<div id="test">Content</div>'
|
|
||||||
>>> get_element_by_id("test", html)
|
|
||||||
'<div id="test">Content</div>'
|
|
||||||
"""
|
|
||||||
parsed = _default_parser.parse(html_content)
|
|
||||||
|
|
||||||
if _default_parser.config.use_lxml and HAS_LXML:
|
|
||||||
try:
|
|
||||||
element = parsed.xpath(f'//*[@id="{element_id}"]')
|
|
||||||
if element:
|
|
||||||
return etree.tostring(element[0], encoding="unicode", method="html")
|
|
||||||
except Exception as e:
|
|
||||||
logger.warning(f"lxml XPath search failed: {e}")
|
|
||||||
return None
|
|
||||||
else:
|
|
||||||
element = parsed.find_by_id(element_id)
|
|
||||||
if element:
|
|
||||||
return _element_to_html(element, html_content)
|
|
||||||
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
def get_element_by_tag(tag_name: str, html_content: str) -> Optional[str]:
|
|
||||||
"""
|
|
||||||
Get first HTML element by tag name.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
tag_name: The tag name to search for
|
|
||||||
html_content: HTML content to search in
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
HTML string of the element or None if not found
|
|
||||||
"""
|
|
||||||
parsed = _default_parser.parse(html_content)
|
|
||||||
|
|
||||||
if _default_parser.config.use_lxml and HAS_LXML:
|
|
||||||
try:
|
|
||||||
elements = parsed.xpath(f"//{tag_name}")
|
|
||||||
if elements:
|
|
||||||
return etree.tostring(elements[0], encoding="unicode", method="html")
|
|
||||||
except Exception as e:
|
|
||||||
logger.warning(f"lxml XPath search failed: {e}")
|
|
||||||
return None
|
|
||||||
else:
|
|
||||||
elements = parsed.find_by_tag(tag_name)
|
|
||||||
if elements:
|
|
||||||
return _element_to_html(elements[0], html_content)
|
|
||||||
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
def get_element_by_class(class_name: str, html_content: str) -> Optional[str]:
|
|
||||||
"""
|
|
||||||
Get first HTML element by class name.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
class_name: The class name to search for
|
|
||||||
html_content: HTML content to search in
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
HTML string of the element or None if not found
|
|
||||||
"""
|
|
||||||
parsed = _default_parser.parse(html_content)
|
|
||||||
|
|
||||||
if _default_parser.config.use_lxml and HAS_LXML:
|
|
||||||
try:
|
|
||||||
elements = parsed.xpath(f'//*[contains(@class, "{class_name}")]')
|
|
||||||
if elements:
|
|
||||||
return etree.tostring(elements[0], encoding="unicode", method="html")
|
|
||||||
except Exception as e:
|
|
||||||
logger.warning(f"lxml XPath search failed: {e}")
|
|
||||||
return None
|
|
||||||
else:
|
|
||||||
elements = parsed.find_by_class(class_name)
|
|
||||||
if elements:
|
|
||||||
return _element_to_html(elements[0], html_content)
|
|
||||||
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
def get_elements_by_tag(tag_name: str, html_content: str) -> List[str]:
|
|
||||||
"""
|
|
||||||
Get all HTML elements by tag name.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
tag_name: The tag name to search for
|
|
||||||
html_content: HTML content to search in
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
List of HTML strings for matching elements
|
|
||||||
"""
|
|
||||||
parsed = _default_parser.parse(html_content)
|
|
||||||
results = []
|
|
||||||
|
|
||||||
if _default_parser.config.use_lxml and HAS_LXML:
|
|
||||||
try:
|
|
||||||
elements = parsed.xpath(f"//{tag_name}")
|
|
||||||
for element in elements:
|
|
||||||
results.append(
|
|
||||||
etree.tostring(element, encoding="unicode", method="html")
|
|
||||||
)
|
|
||||||
except Exception as e:
|
|
||||||
logger.warning(f"lxml XPath search failed: {e}")
|
|
||||||
else:
|
|
||||||
elements = parsed.find_by_tag(tag_name)
|
|
||||||
for element in elements:
|
|
||||||
results.append(_element_to_html(element, html_content))
|
|
||||||
|
|
||||||
return results
|
|
||||||
|
|
||||||
|
|
||||||
def get_elements_by_class(class_name: str, html_content: str) -> List[str]:
|
|
||||||
"""
|
|
||||||
Get all HTML elements by class name.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
class_name: The class name to search for
|
|
||||||
html_content: HTML content to search in
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
List of HTML strings for matching elements
|
|
||||||
"""
|
|
||||||
parsed = _default_parser.parse(html_content)
|
|
||||||
results = []
|
|
||||||
|
|
||||||
if _default_parser.config.use_lxml and HAS_LXML:
|
|
||||||
try:
|
|
||||||
elements = parsed.xpath(f'//*[contains(@class, "{class_name}")]')
|
|
||||||
for element in elements:
|
|
||||||
results.append(
|
|
||||||
etree.tostring(element, encoding="unicode", method="html")
|
|
||||||
)
|
|
||||||
except Exception as e:
|
|
||||||
logger.warning(f"lxml XPath search failed: {e}")
|
|
||||||
else:
|
|
||||||
elements = parsed.find_by_class(class_name)
|
|
||||||
for element in elements:
|
|
||||||
results.append(_element_to_html(element, html_content))
|
|
||||||
|
|
||||||
return results
|
|
||||||
|
|
||||||
|
|
||||||
def get_elements_html_by_class(class_name: str, html_content: str) -> List[str]:
|
|
||||||
"""
|
|
||||||
Get HTML strings of elements by class name.
|
|
||||||
|
|
||||||
This is an alias for get_elements_by_class for yt-dlp compatibility.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
class_name: The class name to search for
|
|
||||||
html_content: HTML content to search in
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
List of HTML strings for matching elements
|
|
||||||
"""
|
|
||||||
return get_elements_by_class(class_name, html_content)
|
|
||||||
|
|
||||||
|
|
||||||
def get_element_text_and_html_by_tag(
|
|
||||||
tag_name: str, html_content: str
|
|
||||||
) -> Tuple[Optional[str], Optional[str]]:
|
|
||||||
"""
|
|
||||||
Get both text content and HTML of first element by tag name.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
tag_name: The tag name to search for
|
|
||||||
html_content: HTML content to search in
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Tuple of (text_content, html_string) or (None, None) if not found
|
|
||||||
|
|
||||||
Examples:
|
|
||||||
>>> html = '<script>alert("test");</script>'
|
|
||||||
>>> get_element_text_and_html_by_tag("script", html)
|
|
||||||
('alert("test");', '<script>alert("test");</script>')
|
|
||||||
"""
|
|
||||||
parsed = _default_parser.parse(html_content)
|
|
||||||
|
|
||||||
if _default_parser.config.use_lxml and HAS_LXML:
|
|
||||||
try:
|
|
||||||
elements = parsed.xpath(f"//{tag_name}")
|
|
||||||
if elements:
|
|
||||||
element = elements[0]
|
|
||||||
text = (
|
|
||||||
element.text_content()
|
|
||||||
if hasattr(element, "text_content")
|
|
||||||
else (element.text or "")
|
|
||||||
)
|
|
||||||
html_str = etree.tostring(element, encoding="unicode", method="html")
|
|
||||||
return text, html_str
|
|
||||||
except Exception as e:
|
|
||||||
logger.warning(f"lxml XPath search failed: {e}")
|
|
||||||
return None, None
|
|
||||||
else:
|
|
||||||
elements = parsed.find_by_tag(tag_name)
|
|
||||||
if elements:
|
|
||||||
element = elements[0]
|
|
||||||
text = _extract_text_content(element)
|
|
||||||
html_str = _element_to_html(element, html_content)
|
|
||||||
return text, html_str
|
|
||||||
|
|
||||||
return None, None
|
|
||||||
|
|
||||||
|
|
||||||
def _element_to_html(element: Dict, original_html: str) -> str:
|
|
||||||
"""
|
|
||||||
Convert parsed element back to HTML string.
|
|
||||||
|
|
||||||
This is a simplified implementation that reconstructs HTML from parsed data.
|
|
||||||
For production use, consider using lxml for better accuracy.
|
|
||||||
"""
|
|
||||||
if not element:
|
|
||||||
return ""
|
|
||||||
|
|
||||||
# Build opening tag
|
|
||||||
tag = element["tag"]
|
|
||||||
attrs = element.get("attrs", {})
|
|
||||||
attr_str = " ".join(f'{k}="{v}"' for k, v in attrs.items() if v is not None)
|
|
||||||
|
|
||||||
if attr_str:
|
|
||||||
opening_tag = f"<{tag} {attr_str}>"
|
|
||||||
else:
|
|
||||||
opening_tag = f"<{tag}>"
|
|
||||||
|
|
||||||
# Add text content
|
|
||||||
text = element.get("text", "")
|
|
||||||
|
|
||||||
# Add children
|
|
||||||
children_html = ""
|
|
||||||
for child in element.get("children", []):
|
|
||||||
children_html += _element_to_html(child, original_html)
|
|
||||||
|
|
||||||
# Build closing tag
|
|
||||||
closing_tag = f"</{tag}>"
|
|
||||||
|
|
||||||
return f"{opening_tag}{text}{children_html}{closing_tag}"
|
|
||||||
|
|
||||||
|
|
||||||
def _extract_text_content(element: Dict) -> str:
|
|
||||||
"""Extract all text content from element and its children."""
|
|
||||||
text = element.get("text", "")
|
|
||||||
|
|
||||||
for child in element.get("children", []):
|
|
||||||
text += _extract_text_content(child)
|
|
||||||
|
|
||||||
return text
|
|
||||||
|
|
||||||
|
|
||||||
def configure_parser(use_lxml: Optional[bool] = None) -> None:
|
|
||||||
"""
|
|
||||||
Configure the global HTML parser.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
use_lxml: Force use of lxml (True), html.parser (False), or auto-detect (None)
|
|
||||||
"""
|
|
||||||
global _default_parser
|
|
||||||
_default_parser = HTMLParser(HTMLParserConfig(use_lxml))
|
|
||||||
logger.info(
|
|
||||||
f"HTML parser configured: {'lxml' if _default_parser.config.use_lxml else 'html.parser'}"
|
|
||||||
)
|
|
||||||
@@ -1,235 +0,0 @@
|
|||||||
"""
|
|
||||||
User agent utilities for web scraping.
|
|
||||||
|
|
||||||
Provides functionality to generate random user agent strings
|
|
||||||
to avoid detection and blocking by websites.
|
|
||||||
"""
|
|
||||||
|
|
||||||
import random
|
|
||||||
from typing import List, Optional
|
|
||||||
|
|
||||||
|
|
||||||
class UserAgentGenerator:
|
|
||||||
"""
|
|
||||||
Generator for realistic user agent strings.
|
|
||||||
|
|
||||||
Provides a variety of common user agents from different browsers
|
|
||||||
and operating systems to help avoid detection.
|
|
||||||
"""
|
|
||||||
|
|
||||||
# Common user agents for different browsers and OS combinations
|
|
||||||
USER_AGENTS = [
|
|
||||||
# Chrome on Windows
|
|
||||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36",
|
|
||||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36",
|
|
||||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
|
|
||||||
# Chrome on macOS
|
|
||||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36",
|
|
||||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36",
|
|
||||||
# Chrome on Linux
|
|
||||||
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36",
|
|
||||||
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36",
|
|
||||||
# Firefox on Windows
|
|
||||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:123.0) Gecko/20100101 Firefox/123.0",
|
|
||||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:122.0) Gecko/20100101 Firefox/122.0",
|
|
||||||
# Firefox on macOS
|
|
||||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:123.0) Gecko/20100101 Firefox/123.0",
|
|
||||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:122.0) Gecko/20100101 Firefox/122.0",
|
|
||||||
# Firefox on Linux
|
|
||||||
"Mozilla/5.0 (X11; Linux x86_64; rv:123.0) Gecko/20100101 Firefox/123.0",
|
|
||||||
"Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:123.0) Gecko/20100101 Firefox/123.0",
|
|
||||||
# Safari on macOS
|
|
||||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.3 Safari/605.1.15",
|
|
||||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.2 Safari/605.1.15",
|
|
||||||
# Edge on Windows
|
|
||||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36 Edg/122.0.0.0",
|
|
||||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36 Edg/121.0.0.0",
|
|
||||||
# Mobile Chrome (Android)
|
|
||||||
"Mozilla/5.0 (Linux; Android 14; SM-G998B) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Mobile Safari/537.36",
|
|
||||||
"Mozilla/5.0 (Linux; Android 13; Pixel 7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Mobile Safari/537.36",
|
|
||||||
# Mobile Safari (iOS)
|
|
||||||
"Mozilla/5.0 (iPhone; CPU iPhone OS 17_3 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.3 Mobile/15E148 Safari/604.1",
|
|
||||||
"Mozilla/5.0 (iPad; CPU OS 17_3 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.3 Mobile/15E148 Safari/604.1",
|
|
||||||
]
|
|
||||||
|
|
||||||
# Browser-specific user agents for when you need a specific browser
|
|
||||||
CHROME_USER_AGENTS = [
|
|
||||||
ua for ua in USER_AGENTS if "Chrome" in ua and "Edg" not in ua
|
|
||||||
]
|
|
||||||
FIREFOX_USER_AGENTS = [ua for ua in USER_AGENTS if "Firefox" in ua]
|
|
||||||
SAFARI_USER_AGENTS = [
|
|
||||||
ua for ua in USER_AGENTS if "Safari" in ua and "Chrome" not in ua
|
|
||||||
]
|
|
||||||
EDGE_USER_AGENTS = [ua for ua in USER_AGENTS if "Edg" in ua]
|
|
||||||
|
|
||||||
# Platform-specific user agents
|
|
||||||
WINDOWS_USER_AGENTS = [ua for ua in USER_AGENTS if "Windows NT" in ua]
|
|
||||||
MACOS_USER_AGENTS = [ua for ua in USER_AGENTS if "Macintosh" in ua]
|
|
||||||
LINUX_USER_AGENTS = [
|
|
||||||
ua for ua in USER_AGENTS if "Linux" in ua and "Android" not in ua
|
|
||||||
]
|
|
||||||
MOBILE_USER_AGENTS = [ua for ua in USER_AGENTS if "Mobile" in ua or "Android" in ua]
|
|
||||||
|
|
||||||
def __init__(self, seed: Optional[int] = None):
|
|
||||||
"""
|
|
||||||
Initialize the user agent generator.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
seed: Random seed for reproducible results (optional)
|
|
||||||
"""
|
|
||||||
if seed is not None:
|
|
||||||
random.seed(seed)
|
|
||||||
|
|
||||||
def random(self) -> str:
|
|
||||||
"""
|
|
||||||
Get a random user agent string.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Random user agent string
|
|
||||||
"""
|
|
||||||
return random.choice(self.USER_AGENTS)
|
|
||||||
|
|
||||||
def random_browser(self, browser: str) -> str:
|
|
||||||
"""
|
|
||||||
Get a random user agent for a specific browser.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
browser: Browser name ('chrome', 'firefox', 'safari', 'edge')
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Random user agent string for the specified browser
|
|
||||||
|
|
||||||
Raises:
|
|
||||||
ValueError: If browser is not supported
|
|
||||||
"""
|
|
||||||
browser = browser.lower()
|
|
||||||
if browser == "chrome":
|
|
||||||
return random.choice(self.CHROME_USER_AGENTS)
|
|
||||||
elif browser == "firefox":
|
|
||||||
return random.choice(self.FIREFOX_USER_AGENTS)
|
|
||||||
elif browser == "safari":
|
|
||||||
return random.choice(self.SAFARI_USER_AGENTS)
|
|
||||||
elif browser == "edge":
|
|
||||||
return random.choice(self.EDGE_USER_AGENTS)
|
|
||||||
else:
|
|
||||||
raise ValueError(f"Unsupported browser: {browser}")
|
|
||||||
|
|
||||||
def random_platform(self, platform: str) -> str:
|
|
||||||
"""
|
|
||||||
Get a random user agent for a specific platform.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
platform: Platform name ('windows', 'macos', 'linux', 'mobile')
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Random user agent string for the specified platform
|
|
||||||
|
|
||||||
Raises:
|
|
||||||
ValueError: If platform is not supported
|
|
||||||
"""
|
|
||||||
platform = platform.lower()
|
|
||||||
if platform == "windows":
|
|
||||||
return random.choice(self.WINDOWS_USER_AGENTS)
|
|
||||||
elif platform in ("macos", "mac"):
|
|
||||||
return random.choice(self.MACOS_USER_AGENTS)
|
|
||||||
elif platform == "linux":
|
|
||||||
return random.choice(self.LINUX_USER_AGENTS)
|
|
||||||
elif platform == "mobile":
|
|
||||||
return random.choice(self.MOBILE_USER_AGENTS)
|
|
||||||
else:
|
|
||||||
raise ValueError(f"Unsupported platform: {platform}")
|
|
||||||
|
|
||||||
def add_user_agent(self, user_agent: str) -> None:
|
|
||||||
"""
|
|
||||||
Add a custom user agent to the list.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
user_agent: Custom user agent string to add
|
|
||||||
"""
|
|
||||||
if user_agent not in self.USER_AGENTS:
|
|
||||||
self.USER_AGENTS.append(user_agent)
|
|
||||||
|
|
||||||
def get_all(self) -> List[str]:
|
|
||||||
"""
|
|
||||||
Get all available user agent strings.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
List of all user agent strings
|
|
||||||
"""
|
|
||||||
return self.USER_AGENTS.copy()
|
|
||||||
|
|
||||||
|
|
||||||
# Global instance for convenience
|
|
||||||
_default_generator = UserAgentGenerator()
|
|
||||||
|
|
||||||
|
|
||||||
def random_user_agent() -> str:
|
|
||||||
"""
|
|
||||||
Get a random user agent string using the default generator.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Random user agent string
|
|
||||||
|
|
||||||
Examples:
|
|
||||||
>>> ua = random_user_agent()
|
|
||||||
>>> "Mozilla" in ua
|
|
||||||
True
|
|
||||||
"""
|
|
||||||
return _default_generator.random()
|
|
||||||
|
|
||||||
|
|
||||||
def random_user_agent_browser(browser: str) -> str:
|
|
||||||
"""
|
|
||||||
Get a random user agent for a specific browser.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
browser: Browser name ('chrome', 'firefox', 'safari', 'edge')
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Random user agent string for the specified browser
|
|
||||||
"""
|
|
||||||
return _default_generator.random_browser(browser)
|
|
||||||
|
|
||||||
|
|
||||||
def random_user_agent_platform(platform: str) -> str:
|
|
||||||
"""
|
|
||||||
Get a random user agent for a specific platform.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
platform: Platform name ('windows', 'macos', 'linux', 'mobile')
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Random user agent string for the specified platform
|
|
||||||
"""
|
|
||||||
return _default_generator.random_platform(platform)
|
|
||||||
|
|
||||||
|
|
||||||
def set_user_agent_seed(seed: int) -> None:
|
|
||||||
"""
|
|
||||||
Set the random seed for user agent generation.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
seed: Random seed value
|
|
||||||
"""
|
|
||||||
global _default_generator
|
|
||||||
_default_generator = UserAgentGenerator(seed)
|
|
||||||
|
|
||||||
|
|
||||||
def add_custom_user_agent(user_agent: str) -> None:
|
|
||||||
"""
|
|
||||||
Add a custom user agent to the default generator.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
user_agent: Custom user agent string to add
|
|
||||||
"""
|
|
||||||
_default_generator.add_user_agent(user_agent)
|
|
||||||
|
|
||||||
|
|
||||||
def get_all_user_agents() -> List[str]:
|
|
||||||
"""
|
|
||||||
Get all available user agent strings from the default generator.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
List of all user agent strings
|
|
||||||
"""
|
|
||||||
return _default_generator.get_all()
|
|
||||||
@@ -1,272 +0,0 @@
|
|||||||
"""
|
|
||||||
Encoding and utility functions for web scraping.
|
|
||||||
|
|
||||||
Provides various encoding utilities including base-N encoding
|
|
||||||
that was previously sourced from yt-dlp.
|
|
||||||
"""
|
|
||||||
|
|
||||||
import string
|
|
||||||
from typing import Optional
|
|
||||||
|
|
||||||
|
|
||||||
def encode_base_n(num: int, n: int, table: Optional[str] = None) -> str:
|
|
||||||
"""
|
|
||||||
Encode a number in base-n representation.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
num: The number to encode
|
|
||||||
n: The base to use for encoding
|
|
||||||
table: Custom character table (optional)
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
String representation of the number in base-n
|
|
||||||
|
|
||||||
Examples:
|
|
||||||
>>> encode_base_n(255, 16)
|
|
||||||
'ff'
|
|
||||||
>>> encode_base_n(42, 36)
|
|
||||||
'16'
|
|
||||||
"""
|
|
||||||
if table is None:
|
|
||||||
# Default table: 0-9, a-z
|
|
||||||
table = string.digits + string.ascii_lowercase
|
|
||||||
|
|
||||||
if not 2 <= n <= len(table):
|
|
||||||
raise ValueError(f"Base must be between 2 and {len(table)}")
|
|
||||||
|
|
||||||
if num == 0:
|
|
||||||
return table[0]
|
|
||||||
|
|
||||||
result = []
|
|
||||||
is_negative = num < 0
|
|
||||||
num = abs(num)
|
|
||||||
|
|
||||||
while num > 0:
|
|
||||||
result.append(table[num % n])
|
|
||||||
num //= n
|
|
||||||
|
|
||||||
if is_negative:
|
|
||||||
result.append("-")
|
|
||||||
|
|
||||||
return "".join(reversed(result))
|
|
||||||
|
|
||||||
|
|
||||||
def decode_base_n(encoded: str, n: int, table: Optional[str] = None) -> int:
|
|
||||||
"""
|
|
||||||
Decode a base-n encoded string back to an integer.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
encoded: The base-n encoded string
|
|
||||||
n: The base used for encoding
|
|
||||||
table: Custom character table (optional)
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
The decoded integer
|
|
||||||
|
|
||||||
Examples:
|
|
||||||
>>> decode_base_n('ff', 16)
|
|
||||||
255
|
|
||||||
>>> decode_base_n('16', 36)
|
|
||||||
42
|
|
||||||
"""
|
|
||||||
if table is None:
|
|
||||||
table = string.digits + string.ascii_lowercase
|
|
||||||
|
|
||||||
if not 2 <= n <= len(table):
|
|
||||||
raise ValueError(f"Base must be between 2 and {len(table)}")
|
|
||||||
|
|
||||||
if not encoded:
|
|
||||||
return 0
|
|
||||||
|
|
||||||
is_negative = encoded.startswith("-")
|
|
||||||
if is_negative:
|
|
||||||
encoded = encoded[1:]
|
|
||||||
|
|
||||||
result = 0
|
|
||||||
for i, char in enumerate(reversed(encoded.lower())):
|
|
||||||
if char not in table:
|
|
||||||
raise ValueError(f"Invalid character '{char}' for base {n}")
|
|
||||||
|
|
||||||
digit_value = table.index(char)
|
|
||||||
if digit_value >= n:
|
|
||||||
raise ValueError(f"Invalid digit '{char}' for base {n}")
|
|
||||||
|
|
||||||
result += digit_value * (n**i)
|
|
||||||
|
|
||||||
return -result if is_negative else result
|
|
||||||
|
|
||||||
|
|
||||||
def url_encode(text: str, safe: str = "") -> str:
|
|
||||||
"""
|
|
||||||
URL encode a string.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
text: Text to encode
|
|
||||||
safe: Characters that should not be encoded
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
URL encoded string
|
|
||||||
"""
|
|
||||||
import urllib.parse
|
|
||||||
|
|
||||||
return urllib.parse.quote(text, safe=safe)
|
|
||||||
|
|
||||||
|
|
||||||
def url_decode(text: str) -> str:
|
|
||||||
"""
|
|
||||||
URL decode a string.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
text: URL encoded text to decode
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Decoded string
|
|
||||||
"""
|
|
||||||
import urllib.parse
|
|
||||||
|
|
||||||
return urllib.parse.unquote(text)
|
|
||||||
|
|
||||||
|
|
||||||
def html_unescape(text: str) -> str:
|
|
||||||
"""
|
|
||||||
Unescape HTML entities in text.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
text: Text containing HTML entities
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Text with HTML entities unescaped
|
|
||||||
|
|
||||||
Examples:
|
|
||||||
>>> html_unescape('"Hello" & <World>')
|
|
||||||
'"Hello" & <World>'
|
|
||||||
"""
|
|
||||||
import html
|
|
||||||
|
|
||||||
return html.unescape(text)
|
|
||||||
|
|
||||||
|
|
||||||
def strip_tags(html_content: str) -> str:
|
|
||||||
"""
|
|
||||||
Remove all HTML tags from content, leaving only text.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
html_content: HTML content with tags
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Plain text with tags removed
|
|
||||||
|
|
||||||
Examples:
|
|
||||||
>>> strip_tags('<p>Hello <b>world</b>!</p>')
|
|
||||||
'Hello world!'
|
|
||||||
"""
|
|
||||||
import re
|
|
||||||
|
|
||||||
return re.sub(r"<[^>]+>", "", html_content)
|
|
||||||
|
|
||||||
|
|
||||||
def normalize_whitespace(text: str) -> str:
|
|
||||||
"""
|
|
||||||
Normalize whitespace in text by collapsing multiple spaces and removing leading/trailing whitespace.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
text: Text to normalize
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Text with normalized whitespace
|
|
||||||
|
|
||||||
Examples:
|
|
||||||
>>> normalize_whitespace(' Hello world \\n\\t ')
|
|
||||||
'Hello world'
|
|
||||||
"""
|
|
||||||
import re
|
|
||||||
|
|
||||||
return re.sub(r"\s+", " ", text.strip())
|
|
||||||
|
|
||||||
|
|
||||||
def extract_domain(url: str) -> str:
|
|
||||||
"""
|
|
||||||
Extract domain from a URL.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
url: Full URL
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Domain portion of the URL
|
|
||||||
|
|
||||||
Examples:
|
|
||||||
>>> extract_domain('https://example.com/path?query=1')
|
|
||||||
'example.com'
|
|
||||||
"""
|
|
||||||
import urllib.parse
|
|
||||||
|
|
||||||
parsed = urllib.parse.urlparse(url)
|
|
||||||
return parsed.netloc
|
|
||||||
|
|
||||||
|
|
||||||
def join_url(base: str, path: str) -> str:
|
|
||||||
"""
|
|
||||||
Join a base URL with a path.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
base: Base URL
|
|
||||||
path: Path to join
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Combined URL
|
|
||||||
|
|
||||||
Examples:
|
|
||||||
>>> join_url('https://example.com', '/api/data')
|
|
||||||
'https://example.com/api/data'
|
|
||||||
"""
|
|
||||||
import urllib.parse
|
|
||||||
|
|
||||||
return urllib.parse.urljoin(base, path)
|
|
||||||
|
|
||||||
|
|
||||||
def parse_query_string(query: str) -> dict:
|
|
||||||
"""
|
|
||||||
Parse a query string into a dictionary.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
query: Query string (with or without leading '?')
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Dictionary of query parameters
|
|
||||||
|
|
||||||
Examples:
|
|
||||||
>>> parse_query_string('?name=John&age=30')
|
|
||||||
{'name': ['John'], 'age': ['30']}
|
|
||||||
"""
|
|
||||||
import urllib.parse
|
|
||||||
|
|
||||||
if query.startswith("?"):
|
|
||||||
query = query[1:]
|
|
||||||
return urllib.parse.parse_qs(query)
|
|
||||||
|
|
||||||
|
|
||||||
def build_query_string(params: dict) -> str:
|
|
||||||
"""
|
|
||||||
Build a query string from a dictionary of parameters.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
params: Dictionary of parameters
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
URL-encoded query string
|
|
||||||
|
|
||||||
Examples:
|
|
||||||
>>> build_query_string({'name': 'John', 'age': 30})
|
|
||||||
'name=John&age=30'
|
|
||||||
"""
|
|
||||||
import urllib.parse
|
|
||||||
|
|
||||||
# Handle both single values and lists
|
|
||||||
normalized_params = {}
|
|
||||||
for key, value in params.items():
|
|
||||||
if isinstance(value, (list, tuple)):
|
|
||||||
normalized_params[key] = value
|
|
||||||
else:
|
|
||||||
normalized_params[key] = [str(value)]
|
|
||||||
|
|
||||||
return urllib.parse.urlencode(normalized_params, doseq=True)
|
|
||||||
Reference in New Issue
Block a user