Files
FastAnime/app/libs/animdl/animdl_data_helper.py
2024-08-05 09:46:54 +03:00

169 lines
5.3 KiB
Python

import re
import json
from fuzzywuzzy import fuzz
from .extras import Logger
from .animdl_types import AnimdlAnimeUrlAndTitle,AnimdlData,AnimdlAnimeEpisode,AnimdlEpisodeStream
# Currently this links don't work so we filter it out
broken_link_pattern = r"https://tools.fast4speed.rsvp/\w*"
def path_parser(path: str) -> str:
"""Parses a string and removes path unsafe characters
Args:
path (str): a path literal
Returns:
str: a parsed string that can be used as a valid path
"""
return (
path.replace(":", "")
.replace("/", "")
.replace("\\", "")
.replace('"', "")
.replace("'", "")
.replace("<", "")
.replace(">", "")
.replace("|", "")
.replace("?", "")
.replace(".", "")
.replace("*", "")
)
def string_contains_only_spaces(input_string: str) -> bool:
"""Checks if the string is a string of spaces
Args:
input_string (str): any string
Returns:
bool: a boolean in indicating whether it does contain only spaces or not
"""
return all(char.isspace() for char in input_string)
def anime_title_percentage_match(
possible_user_requested_anime_title: str, title: str
) -> int:
"""Returns the percentage match between the possible title and user title
Args:
possible_user_requested_anime_title (str): an Animdl search result title
title (str): the anime title the user wants
Returns:
int: the percentage match
"""
percentage_ratio = fuzz.ratio(title, possible_user_requested_anime_title)
Logger.info(
f"Animdl Api Fuzzy: Percentage match of {possible_user_requested_anime_title} against {title}: {percentage_ratio}%"
)
return percentage_ratio
def filter_broken_streams(
streams: list[AnimdlEpisodeStream],
) -> list[AnimdlEpisodeStream]:
stream_filter = lambda stream: (
True if not re.match(broken_link_pattern, stream["stream_url"]) else False
)
return list(filter(stream_filter, streams))
def filter_streams_by_quality(
anime_episode_streams: list[AnimdlEpisodeStream], quality: str|int, strict=False
) -> AnimdlEpisodeStream:
# filtered_streams = []
# get the appropriate stream or default to best
get_quality_func = lambda stream_: (
stream_.get("quality") if stream_.get("quality") else 0
)
match quality:
case "best":
return max(anime_episode_streams, key=get_quality_func)
case "worst":
return min(anime_episode_streams, key=get_quality_func)
case _:
for episode_stream in anime_episode_streams:
if str(episode_stream.get("quality")) == str(quality):
return episode_stream
else:
# if not strict:
Logger.info(f"Animdl Api: Not strict so defaulting to best")
return max(anime_episode_streams, key=get_quality_func)
# else:
# Logger.warning(
# f"Animdl Api: No stream matching the given quality was found"
# )
# return AnimdlEpisodeStream({})
# TODO: add typing to return dict
def parse_stream_urls_data(raw_stream_urls_data: str) -> list[AnimdlAnimeEpisode]:
try:
return [
AnimdlAnimeEpisode(json.loads(episode.strip()))
for episode in raw_stream_urls_data.strip().split("\n")
]
except Exception as e:
Logger.error(f"Animdl Api Parser {e}")
return []
def search_output_parser(raw_data: str) -> list[AnimdlAnimeUrlAndTitle]:
"""Parses the recieved raw search animdl data and makes it more easy to use
Args:
raw_data (str): valid animdl data
Returns:
dict: parsed animdl data containing an anime title
"""
# get each line of dat and ignore those that contain unwanted data
data = raw_data.split("\n")[3:]
parsed_data = []
pass_next = False
# loop through all lines and return an appropriate AnimdlAimeUrlAndTitle
for i, data_item in enumerate(data[:]):
# continue if current was used in creating previous animdlanimeurlandtitle
if pass_next:
pass_next = False
continue
# there is no data or its just spaces so ignore and continue
if not data_item or string_contains_only_spaces(data_item):
continue
# split title? from url?
item = data_item.split(" / ")
numbering_pattern = r"^\d*\.\s*"
# attempt to parse
try:
# remove numbering from search results
anime_title = re.sub(numbering_pattern, "", item[0]).lower()
# special case for onepiece since allanime labels it as 1p instead of onepiece
one_piece_regex = re.compile(r"1p", re.IGNORECASE)
if one_piece_regex.match(anime_title):
anime_title = "one piece"
# checks if the data is already structure like anime title, animdl url if not makes it that way
if item[1] == "" or string_contains_only_spaces(item[1]):
pass_next = True
parsed_data.append(AnimdlAnimeUrlAndTitle(anime_title, data[(i + 1)]))
else:
parsed_data.append(AnimdlAnimeUrlAndTitle(anime_title, item[1]))
except:
pass
return parsed_data # anime title,url