tg_gentoo_bot/bot.py
Aleksey Chubukov 36773a85be v0.0.2
second version of sticker downloader
2022-12-28 15:10:28 +03:00

156 lines
6.1 KiB
Python

#!/usr/bin/env python3
import yaml
from telethon import TelegramClient, events, sync
import logging
from telethon import functions, types, errors
from pathlib import Path
logging.basicConfig(filename=str("bot.log"),level=logging.DEBUG)
log = logging.getLogger("gentoobot")
def fetch_dialogs(dialogs, sources):
for dialog in dialogs:
log.debug(dialog)
e = dialog.entity
for src in sources:
if e.id != src["id"]:
continue
log.debug('dialog %s %s matches by id', e.id, e.title)
if src["type"] == "Channel" and not isinstance(e, types.Channel):
log.debug('dialog %s is not a channel', e.id)
continue
elif src["type"] == "User" and not isinstance(e, types.User):
continue
log.debug('dialog %s is not a user', e.id)
log.info(dialog.stringify())
yield dialog
def iter_sources(client, sources,):
for dialog in fetch_dialogs(client.iter_dialogs(), sources):
for message in client.iter_messages(dialog, limit=None):
try:
if not hasattr(message, "media"):
log.debug("message %i has no media", message.id)
continue
if not hasattr(message.media, "document"):
log.debug("message %i has no documents", message.id)
continue
for a in message.media.document.attributes:
if hasattr(a, "stickerset"):
log.debug("document %s is a sticker", message.media.document.id)
yield message.media.document
if isinstance(a.stickerset, types.InputStickerSetID):
log.debug("document %s belongs to stickerset", message.media.document.id)
yield a.stickerset
except Exception as e:
log.error("somethin wrong happened during checking message: %s", message.stringify(), exc_info=e)
class StickerDownloader:
mimes={
"image/webp": "webp",
"application/x-tgsticker": "tgs",
"video/webm": "webm"
}
_log = logging.getLogger("gentoobot.downloader")
def __init__(self, client, dl_root):
self.downloads_root = Path(str(dl_root))
self.downloads_root.mkdir(exist_ok=True)
self.sticker_downloads = self.downloads_root/"stickers"
self.sticker_downloads.mkdir(exist_ok=True)
self.downloads_root.mkdir(exist_ok=True)
self.seen_stickers = set()
self.seen_packs = set()
self.client = client
@classmethod
def get_sticker_metadata(cls,document):
meta = {
"id": document.id,
"date": document.date
}
for a in document.attributes:
if isinstance(a, types.DocumentAttributeSticker):
if isinstance(a.stickerset, types.InputStickerSetID):
cls._log.debug("document %s is a normal sticker", document.id)
meta["pack"] = a.stickerset.id
if hasattr(a, 'alt'):
cls._log.debug('sticker has alt text %s', a.alt)
meta["alt"] = a.alt
return meta
def download(self, document):
if document.id not in self.seen_stickers:
try:
meta = self.get_sticker_metadata(document)
to_dir = self.sticker_downloads / str(meta.get("pack", "inline"))
to_dir.mkdir(exist_ok=True)
to_file = to_dir / (str(document.id)+"."+self.mimes.get(document.mime_type, ""))
if not to_file.exists() or to_file.stat().st_size != document.size:
self._log.info("downloading %s to %s", document.id, str(to_file))
self.client.download_file(document, to_file)
self.seen_stickers.add(document.id)
else:
self._log.info("document %s already downloaded at %s", document.id, str(to_file))
except Exception as e:
self._log.critical("oops: ", exc_info=e)
def download_pack(self, inputpack):
if inputpack.id not in self.seen_packs:
try:
req = functions.messages.GetStickerSetRequest(stickerset=inputpack,hash=0)
stickers=self.client(req)
sticker_dir = self.sticker_downloads/str(inputpack.id)
sticker_dir.mkdir(exist_ok=True)
with open(str(sticker_dir/"meta.txt"), "w") as meta:
meta.write(stickers.to_json())
self._log.info("preparing to download whole stickerset %s", inputpack.id)
for doc in stickers.documents:
self.download(doc)
self.seen_packs.add(inputpack.id)
except errors.rpcerrorlist.StickersetInvalidError:
self._log.warning("sadly, stickerset %s no longer exists", inputpack.id)
def load_config(path):
log.debug("opening %s", repr(path))
with open(str(path)) as cfgstream:
cfg = yaml.safe_load(cfgstream)
log.debug(cfg)
return cfg
def make_client(nth):
cfg = load_config("config.yaml")
app_id = cfg["apps"][nth]["id"]
app_hash = cfg["apps"][nth]['hash']
log.debug("client id %s hash %s", app_id, app_hash)
return TelegramClient("session"+str(nth)+str(app_id), app_id, app_hash)
def main():
cfg = load_config("config.yaml")
app_id = cfg["apps"][0]["id"]
app_hash = cfg["apps"][0]['hash']
log.debug("client id %s hash %s", app_id, app_hash)
client = TelegramClient("session0"+str(app_id), app_id, app_hash)
client.start()
log.debug(client.get_me().stringify())
dl = StickerDownloader(client, "./download")
try:
for found in iter_sources(client, cfg["sources"]):
if isinstance(found, types.Document):
dl.download(found)
elif isinstance(found, types.InputStickerSetID):
dl.download_pack(found)
except KeyboardInterrupt:
log.warn("exiting: interrupted by keyboard")
client.disconnect()
####################
if __name__ == "__main__":
main()