v0.0.2
second version of sticker downloader
This commit is contained in:
		
							parent
							
								
									b248582193
								
							
						
					
					
						commit
						36773a85be
					
				
							
								
								
									
										4
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										4
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							@ -1,2 +1,6 @@
 | 
				
			|||||||
/config
 | 
					/config
 | 
				
			||||||
/env
 | 
					/env
 | 
				
			||||||
 | 
					/config.yaml
 | 
				
			||||||
 | 
					/download
 | 
				
			||||||
 | 
					/bot.log
 | 
				
			||||||
 | 
					/session*
 | 
				
			||||||
 | 
				
			|||||||
							
								
								
									
										212
									
								
								bot.py
									
									
									
									
									
								
							
							
						
						
									
										212
									
								
								bot.py
									
									
									
									
									
								
							@ -5,86 +5,146 @@ import logging
 | 
				
			|||||||
from telethon import functions, types, errors
 | 
					from telethon import functions, types, errors
 | 
				
			||||||
from pathlib import Path
 | 
					from pathlib import Path
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					logging.basicConfig(filename=str("bot.log"),level=logging.DEBUG)
 | 
				
			||||||
 | 
					log = logging.getLogger("gentoobot")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def fetch_dialogs(dialogs, sources):
 | 
				
			||||||
 | 
					    for dialog in dialogs:
 | 
				
			||||||
 | 
					        log.debug(dialog)
 | 
				
			||||||
 | 
					        e = dialog.entity
 | 
				
			||||||
 | 
					        for src in sources:
 | 
				
			||||||
 | 
					            if e.id != src["id"]:
 | 
				
			||||||
 | 
					                continue
 | 
				
			||||||
 | 
					            log.debug('dialog %s %s matches by id', e.id, e.title)
 | 
				
			||||||
 | 
					            if   src["type"] == "Channel" and not isinstance(e, types.Channel):
 | 
				
			||||||
 | 
					                log.debug('dialog %s is not a channel', e.id)
 | 
				
			||||||
 | 
					                continue
 | 
				
			||||||
 | 
					            elif src["type"] == "User" and not isinstance(e, types.User):
 | 
				
			||||||
 | 
					                continue
 | 
				
			||||||
 | 
					                log.debug('dialog %s is not a user', e.id)
 | 
				
			||||||
 | 
					            log.info(dialog.stringify())
 | 
				
			||||||
 | 
					            yield dialog
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def iter_sources(client, sources,):
 | 
				
			||||||
 | 
					    for dialog in fetch_dialogs(client.iter_dialogs(), sources):
 | 
				
			||||||
 | 
					        for message in client.iter_messages(dialog, limit=None):
 | 
				
			||||||
 | 
					            try:
 | 
				
			||||||
 | 
					                if not hasattr(message, "media"):
 | 
				
			||||||
 | 
					                    log.debug("message %i has no media", message.id)
 | 
				
			||||||
 | 
					                    continue
 | 
				
			||||||
 | 
					                if not hasattr(message.media, "document"):
 | 
				
			||||||
 | 
					                    log.debug("message %i has no documents", message.id)
 | 
				
			||||||
 | 
					                    continue
 | 
				
			||||||
 | 
					                for a in message.media.document.attributes:
 | 
				
			||||||
 | 
					                    if hasattr(a, "stickerset"):
 | 
				
			||||||
 | 
					                        log.debug("document %s is a sticker", message.media.document.id)
 | 
				
			||||||
 | 
					                        yield message.media.document 
 | 
				
			||||||
 | 
					                        if isinstance(a.stickerset, types.InputStickerSetID):
 | 
				
			||||||
 | 
					                            log.debug("document %s belongs to stickerset", message.media.document.id)
 | 
				
			||||||
 | 
					                            yield a.stickerset
 | 
				
			||||||
 | 
					            except Exception as e:
 | 
				
			||||||
 | 
					                log.error("somethin wrong happened during checking message: %s", message.stringify(), exc_info=e)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class StickerDownloader:
 | 
				
			||||||
    mimes={
 | 
					    mimes={
 | 
				
			||||||
        "image/webp": "webp",
 | 
					        "image/webp": "webp",
 | 
				
			||||||
        "application/x-tgsticker": "tgs",
 | 
					        "application/x-tgsticker": "tgs",
 | 
				
			||||||
        "video/webm": "webm"
 | 
					        "video/webm": "webm"
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
w = Path(".")
 | 
					    _log = logging.getLogger("gentoobot.downloader")
 | 
				
			||||||
wn = w / "env"
 | 
					 | 
				
			||||||
wn.mkdir(exist_ok=True)
 | 
					 | 
				
			||||||
wd = wn/"download"
 | 
					 | 
				
			||||||
wd.mkdir(exist_ok=True)
 | 
					 | 
				
			||||||
wdi = wd/"inline"
 | 
					 | 
				
			||||||
wdi.mkdir(exist_ok=True)
 | 
					 | 
				
			||||||
logging.basicConfig(filename=str(wn/"bot.log"),level=logging.DEBUG)
 | 
					 | 
				
			||||||
log = logging.getLogger("gentoobot")
 | 
					 | 
				
			||||||
dllog=logging.getLogger("gentoobot.download")
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
def download_sticker(client, document):
 | 
					    def __init__(self, client, dl_root):
 | 
				
			||||||
    stickerdir, setid = get_sticker_setid(document)
 | 
					        self.downloads_root = Path(str(dl_root))
 | 
				
			||||||
    if stickerdir is None:
 | 
					        self.downloads_root.mkdir(exist_ok=True)
 | 
				
			||||||
        return
 | 
					        self.sticker_downloads = self.downloads_root/"stickers"
 | 
				
			||||||
    dldir = wd / stickerdir
 | 
					        self.sticker_downloads.mkdir(exist_ok=True)
 | 
				
			||||||
    dldir.mkdir(exist_ok=True)
 | 
					        self.downloads_root.mkdir(exist_ok=True)
 | 
				
			||||||
    dlpath = dldir / (str(document.id)+"."+mimes.get(document.mime_type, "")) 
 | 
					        self.seen_stickers = set()
 | 
				
			||||||
    if not  dlpath.exists() or dlpath.stat().st_size != document.size:
 | 
					        self.seen_packs = set()
 | 
				
			||||||
        dllog.info("downloading %s to %s", document.id, str(dlpath))
 | 
					        self.client = client
 | 
				
			||||||
        client.download_file(document, dlpath)
 | 
					 | 
				
			||||||
    else:
 | 
					 | 
				
			||||||
        dllog.info("document %s already downloaded at %s", document.id, str(dlpath))
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
def get_sticker_setid(document):
 | 
					    @classmethod
 | 
				
			||||||
    stickerset = None
 | 
					    def get_sticker_metadata(cls,document):
 | 
				
			||||||
 | 
					        meta = {
 | 
				
			||||||
 | 
					                "id": document.id,
 | 
				
			||||||
 | 
					                "date": document.date
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
        for a in document.attributes:
 | 
					        for a in document.attributes:
 | 
				
			||||||
        if hasattr(a, "stickerset"):
 | 
					            if isinstance(a, types.DocumentAttributeSticker):
 | 
				
			||||||
            stickerset = a.stickerset
 | 
					                if isinstance(a.stickerset, types.InputStickerSetID):
 | 
				
			||||||
    if stickerset is None:
 | 
					                    cls._log.debug("document %s is a normal sticker", document.id)
 | 
				
			||||||
        dllog.debug("document %s is not a sticker", document.id)
 | 
					                    meta["pack"] = a.stickerset.id
 | 
				
			||||||
        return None, None
 | 
					                if hasattr(a, 'alt'):
 | 
				
			||||||
    if isinstance(stickerset, types.InputStickerSetID):
 | 
					                    cls._log.debug('sticker has alt text %s', a.alt)
 | 
				
			||||||
        log.debug("document %s is a normal sticker", document.id)
 | 
					                    meta["alt"] = a.alt
 | 
				
			||||||
        return str(stickerset.id), stickerset
 | 
					        return meta
 | 
				
			||||||
    if isinstance(stickerset, types.InputStickerSetEmpty):
 | 
					 | 
				
			||||||
        dllog.debug("document %s is an inline sticker", document.id)
 | 
					 | 
				
			||||||
        return "inline", stickerset
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
def fetch_dialogs(client):
 | 
					    def download(self, document):
 | 
				
			||||||
    sticker_archives=list()
 | 
					        if document.id not in self.seen_stickers:
 | 
				
			||||||
    for dialog in client.iter_dialogs():
 | 
					 | 
				
			||||||
        log.debug(dialog)
 | 
					 | 
				
			||||||
        if dialog.entity.id in cfg["stickers"]["sources"]["dialogs"]:
 | 
					 | 
				
			||||||
            log.info(dialog.stringify())
 | 
					 | 
				
			||||||
            yield dialog
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
def process_archive(archive, stickerset_seen=set()):
 | 
					 | 
				
			||||||
    for msg in client.iter_messages(sticker_archive, limit=None):
 | 
					 | 
				
			||||||
        log.debug(msg)
 | 
					 | 
				
			||||||
        if not hasattr(msg, "media"):
 | 
					 | 
				
			||||||
            log.debug("message %i has no media", msg.id)
 | 
					 | 
				
			||||||
            continue
 | 
					 | 
				
			||||||
        if not hasattr(msg.media, "document"):
 | 
					 | 
				
			||||||
            log.debug("message %i has no documents", msg.id)
 | 
					 | 
				
			||||||
            continue
 | 
					 | 
				
			||||||
            try:
 | 
					            try:
 | 
				
			||||||
            dldir, setid = get_sticker_setid(msg.media.document)
 | 
					                meta = self.get_sticker_metadata(document)
 | 
				
			||||||
            if dldir == "inline":
 | 
					                to_dir = self.sticker_downloads / str(meta.get("pack", "inline"))
 | 
				
			||||||
                download_sticker(client, msg.media.document)
 | 
					                to_dir.mkdir(exist_ok=True)
 | 
				
			||||||
            elif dldir is not None:
 | 
					                to_file = to_dir / (str(document.id)+"."+self.mimes.get(document.mime_type, ""))
 | 
				
			||||||
                download_sticker(client, msg.media.document)
 | 
					                if not  to_file.exists() or to_file.stat().st_size != document.size:
 | 
				
			||||||
                if setid.id not in stickerset_seen:
 | 
					                    self._log.info("downloading %s to %s", document.id, str(to_file))
 | 
				
			||||||
                    log.info("preparing to download whole stickerset %s as %s", setid, dldir)
 | 
					                    self.client.download_file(document, to_file)
 | 
				
			||||||
                    stickerset_seen.add(setid.id)
 | 
					                    self.seen_stickers.add(document.id)
 | 
				
			||||||
                    try:
 | 
					                else:
 | 
				
			||||||
                        for doc in client(functions.messages.GetStickerSetRequest(stickerset=setid,hash=0)).documents:
 | 
					                    self._log.info("document %s already downloaded at %s", document.id, str(to_file))
 | 
				
			||||||
                            try: 
 | 
					 | 
				
			||||||
                                download_sticker(client, doc)
 | 
					 | 
				
			||||||
            except Exception as e:
 | 
					            except Exception as e:
 | 
				
			||||||
                                log.critical("oops: %s", exc_info=e)
 | 
					               self._log.critical("oops: ", exc_info=e)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def download_pack(self, inputpack):
 | 
				
			||||||
 | 
					        if inputpack.id not in self.seen_packs:
 | 
				
			||||||
 | 
					            try:
 | 
				
			||||||
 | 
					                req = functions.messages.GetStickerSetRequest(stickerset=inputpack,hash=0)
 | 
				
			||||||
 | 
					                stickers=self.client(req)
 | 
				
			||||||
 | 
					                sticker_dir = self.sticker_downloads/str(inputpack.id)
 | 
				
			||||||
 | 
					                sticker_dir.mkdir(exist_ok=True)
 | 
				
			||||||
 | 
					                with open(str(sticker_dir/"meta.txt"), "w") as meta:
 | 
				
			||||||
 | 
					                    meta.write(stickers.to_json())
 | 
				
			||||||
 | 
					                self._log.info("preparing to download whole stickerset %s", inputpack.id)
 | 
				
			||||||
 | 
					                for doc in stickers.documents:
 | 
				
			||||||
 | 
					                    self.download(doc)
 | 
				
			||||||
 | 
					                self.seen_packs.add(inputpack.id)
 | 
				
			||||||
            except errors.rpcerrorlist.StickersetInvalidError:
 | 
					            except errors.rpcerrorlist.StickersetInvalidError:
 | 
				
			||||||
                        log.warning("sadly, stickerset %s no longer exists", setid.id)
 | 
					                self._log.warning("sadly, stickerset %s no longer exists", inputpack.id)
 | 
				
			||||||
        except Exception as e:
 | 
					
 | 
				
			||||||
            log.error("somethin wrong happened during checking message: %s", msg.stringify(), exc_info=e)
 | 
					def load_config(path):
 | 
				
			||||||
 | 
					    log.debug("opening %s", repr(path))
 | 
				
			||||||
 | 
					    with open(str(path)) as cfgstream:
 | 
				
			||||||
 | 
					        cfg = yaml.safe_load(cfgstream)
 | 
				
			||||||
 | 
					        log.debug(cfg)
 | 
				
			||||||
 | 
					    return cfg
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def make_client(nth):
 | 
				
			||||||
 | 
					    cfg = load_config("config.yaml")
 | 
				
			||||||
 | 
					    app_id = cfg["apps"][nth]["id"]
 | 
				
			||||||
 | 
					    app_hash = cfg["apps"][nth]['hash']
 | 
				
			||||||
 | 
					    log.debug("client id %s hash %s", app_id, app_hash)
 | 
				
			||||||
 | 
					    return TelegramClient("session"+str(nth)+str(app_id), app_id, app_hash)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def main():
 | 
				
			||||||
 | 
					    cfg = load_config("config.yaml")
 | 
				
			||||||
 | 
					    app_id = cfg["apps"][0]["id"]
 | 
				
			||||||
 | 
					    app_hash = cfg["apps"][0]['hash']
 | 
				
			||||||
 | 
					    log.debug("client id %s hash %s", app_id, app_hash)
 | 
				
			||||||
 | 
					    client = TelegramClient("session0"+str(app_id), app_id, app_hash)
 | 
				
			||||||
 | 
					    client.start()
 | 
				
			||||||
 | 
					    log.debug(client.get_me().stringify())
 | 
				
			||||||
 | 
					    dl = StickerDownloader(client, "./download")
 | 
				
			||||||
 | 
					    try:
 | 
				
			||||||
 | 
					        for found in iter_sources(client, cfg["sources"]):
 | 
				
			||||||
 | 
					            if isinstance(found, types.Document):
 | 
				
			||||||
 | 
					                dl.download(found)
 | 
				
			||||||
 | 
					            elif isinstance(found, types.InputStickerSetID):
 | 
				
			||||||
 | 
					                dl.download_pack(found)
 | 
				
			||||||
 | 
					    except KeyboardInterrupt:
 | 
				
			||||||
 | 
					        log.warn("exiting: interrupted by keyboard")
 | 
				
			||||||
 | 
					    client.disconnect()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -92,18 +152,4 @@ def process_archive(archive, stickerset_seen=set()):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
if __name__ == "__main__":
 | 
					if __name__ == "__main__":
 | 
				
			||||||
    log.debug("opening %s", repr("config/bot.yaml"))
 | 
					    main()
 | 
				
			||||||
    with open("config/bot.yaml") as cfgstream:
 | 
					 | 
				
			||||||
        cfg = yaml.safe_load(cfgstream)
 | 
					 | 
				
			||||||
        log.debug(cfg)
 | 
					 | 
				
			||||||
        app = cfg["apps"][0]
 | 
					 | 
				
			||||||
    log.debug("starting client with id %s and hash %s", app['id'], app['hash'])
 | 
					 | 
				
			||||||
    client = TelegramClient('env/gentoo_session', app['id'], app['hash'])
 | 
					 | 
				
			||||||
    client.start()
 | 
					 | 
				
			||||||
    log.debug(client.get_me().stringify())
 | 
					 | 
				
			||||||
    try:
 | 
					 | 
				
			||||||
        for sticker_archive in fetch_dialogs(client):
 | 
					 | 
				
			||||||
            process_archive(sticker_archive)
 | 
					 | 
				
			||||||
    except KeyboardInterrupt:
 | 
					 | 
				
			||||||
        pass
 | 
					 | 
				
			||||||
    client.disconnect()
 | 
					 | 
				
			||||||
 | 
				
			|||||||
		Loading…
	
		Reference in New Issue
	
	Block a user