Source code for sharkey_crawler.main

#   ---------------------------------------------------------------------------------
#   Copyright (c) Hexafuchs. All rights reserved.
#   Licensed under the MIT License. See LICENSE in project root for information.
#   ---------------------------------------------------------------------------------
"""This provides a sharkey accessor instance."""

from __future__ import annotations

from typing import Annotated

import requests
from annotated_types import Interval

from .types import Post, SharkeyId
from .convert import dict_keys_to_snake_case

__all__ = ["SharkeyServer"]


[docs] class SharkeyServer: """ Local representation of a sharkey server, exposes server api endpoints and parses data. If you require more endpoints, feel free to open a pull request or discussion. """
[docs] def __init__(self, base_url: str): """ :param base_url: base url of the sharkey server. if no scheme is passed, https is assumed :returns: new sharkey proxy instance """ self.base_url = base_url.rstrip("/") if not self.base_url.startswith("http://") and not self.base_url.startswith("https://"): self.base_url = f"https://{self.base_url}"
# noinspection PyTypeHints
[docs] def user_notes( self, user_id: SharkeyId, with_channel_notes: bool = False, with_renotes: bool = True, with_files: bool = False, with_replies: bool = False, limit: Annotated[int, Interval(ge=0, le=100)] = 10, allow_partial: bool = False, since_date: int | None = None, until_date: int | None = None, since_id: SharkeyId | None = None, until_id: SharkeyId | None = None, timeout: int | float | None = 300, ) -> list[Post]: """ This function returns the latest posts about a user. **WARNING: Because the functionality is not documented, I will take an educated guess about the meaning of the arguments. I can only spend looking into other peoples codes for so much time. Please open an issue if I got something wrong. If you want to contribute, have a look at the code yourself at https://activitypub.software/TransFem-org/Sharkey** :param user_id: user id you want to crawl :param with_channel_notes: :param with_renotes: include boosts (boosts that quote something are always included) :param with_files: include posts with files :param with_replies: include replies to other users :param limit: maximum number of posts, between 1 and 100 :param allow_partial: read only from redis, do not resort to the database to fill the limit :param since_date: get posts after or from this date, expressed as milliseconds since epoch, do not use with other `since_` or `until_` argument :param until_date: get posts before or from this date, expressed as milliseconds since epoch, do not use with other `since_` or `until_` argument :param since_id: get posts after this id (and this id), expressed as milliseconds since epoch, do not use with other `since_` or `until_` argument :param until_id: get posts before this id (and this id), expressed as milliseconds since epoch, do not use with other `since_` or `until_` argument :param timeout: timeout of the request :returns: list of posts """ payload = { "userId": user_id, "withChannelNotes": with_channel_notes, "withRenotes": with_renotes, "withFiles": with_files, "withReplies": with_replies, "limit": limit, "allowPartial": allow_partial, } if since_date: payload["sinceDate"] = since_date if until_date: payload["untilDate"] = until_date if since_id: payload["sinceId"] = since_id if until_id: payload["untilId"] = until_id response = requests.post(self.base_url + "/api/users/notes", json=payload, timeout=timeout) data = response.json() posts = [] for post in data: posts.append(Post.model_validate(dict_keys_to_snake_case(post))) return posts