v0.0.2
second version of sticker downloader
This commit is contained in:
		
							parent
							
								
									b248582193
								
							
						
					
					
						commit
						36773a85be
					
				
							
								
								
									
										4
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										4
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							| @ -1,2 +1,6 @@ | ||||
| /config | ||||
| /env | ||||
| /config.yaml | ||||
| /download | ||||
| /bot.log | ||||
| /session* | ||||
|  | ||||
							
								
								
									
										224
									
								
								bot.py
									
									
									
									
									
								
							
							
						
						
									
										224
									
								
								bot.py
									
									
									
									
									
								
							| @ -5,86 +5,146 @@ import logging | ||||
| from telethon import functions, types, errors | ||||
| from pathlib import Path | ||||
| 
 | ||||
| mimes={ | ||||
|     "image/webp": "webp", | ||||
|     "application/x-tgsticker": "tgs", | ||||
|     "video/webm": "webm" | ||||
|     } | ||||
| w = Path(".") | ||||
| wn = w / "env" | ||||
| wn.mkdir(exist_ok=True) | ||||
| wd = wn/"download" | ||||
| wd.mkdir(exist_ok=True) | ||||
| wdi = wd/"inline" | ||||
| wdi.mkdir(exist_ok=True) | ||||
| logging.basicConfig(filename=str(wn/"bot.log"),level=logging.DEBUG) | ||||
| logging.basicConfig(filename=str("bot.log"),level=logging.DEBUG) | ||||
| log = logging.getLogger("gentoobot") | ||||
| dllog=logging.getLogger("gentoobot.download") | ||||
| 
 | ||||
| def download_sticker(client, document): | ||||
|     stickerdir, setid = get_sticker_setid(document) | ||||
|     if stickerdir is None: | ||||
|         return | ||||
|     dldir = wd / stickerdir | ||||
|     dldir.mkdir(exist_ok=True) | ||||
|     dlpath = dldir / (str(document.id)+"."+mimes.get(document.mime_type, ""))  | ||||
|     if not  dlpath.exists() or dlpath.stat().st_size != document.size: | ||||
|         dllog.info("downloading %s to %s", document.id, str(dlpath)) | ||||
|         client.download_file(document, dlpath) | ||||
|     else: | ||||
|         dllog.info("document %s already downloaded at %s", document.id, str(dlpath)) | ||||
| 
 | ||||
| def get_sticker_setid(document): | ||||
|     stickerset = None | ||||
|     for a in document.attributes: | ||||
|         if hasattr(a, "stickerset"): | ||||
|             stickerset = a.stickerset | ||||
|     if stickerset is None: | ||||
|         dllog.debug("document %s is not a sticker", document.id) | ||||
|         return None, None | ||||
|     if isinstance(stickerset, types.InputStickerSetID): | ||||
|         log.debug("document %s is a normal sticker", document.id) | ||||
|         return str(stickerset.id), stickerset | ||||
|     if isinstance(stickerset, types.InputStickerSetEmpty): | ||||
|         dllog.debug("document %s is an inline sticker", document.id) | ||||
|         return "inline", stickerset | ||||
| 
 | ||||
| def fetch_dialogs(client): | ||||
|     sticker_archives=list() | ||||
|     for dialog in client.iter_dialogs(): | ||||
| def fetch_dialogs(dialogs, sources): | ||||
|     for dialog in dialogs: | ||||
|         log.debug(dialog) | ||||
|         if dialog.entity.id in cfg["stickers"]["sources"]["dialogs"]: | ||||
|         e = dialog.entity | ||||
|         for src in sources: | ||||
|             if e.id != src["id"]: | ||||
|                 continue | ||||
|             log.debug('dialog %s %s matches by id', e.id, e.title) | ||||
|             if   src["type"] == "Channel" and not isinstance(e, types.Channel): | ||||
|                 log.debug('dialog %s is not a channel', e.id) | ||||
|                 continue | ||||
|             elif src["type"] == "User" and not isinstance(e, types.User): | ||||
|                 continue | ||||
|                 log.debug('dialog %s is not a user', e.id) | ||||
|             log.info(dialog.stringify()) | ||||
|             yield dialog | ||||
| 
 | ||||
| def process_archive(archive, stickerset_seen=set()): | ||||
|     for msg in client.iter_messages(sticker_archive, limit=None): | ||||
|         log.debug(msg) | ||||
|         if not hasattr(msg, "media"): | ||||
|             log.debug("message %i has no media", msg.id) | ||||
|             continue | ||||
|         if not hasattr(msg.media, "document"): | ||||
|             log.debug("message %i has no documents", msg.id) | ||||
|             continue | ||||
|         try: | ||||
|             dldir, setid = get_sticker_setid(msg.media.document) | ||||
|             if dldir == "inline": | ||||
|                 download_sticker(client, msg.media.document) | ||||
|             elif dldir is not None: | ||||
|                 download_sticker(client, msg.media.document) | ||||
|                 if setid.id not in stickerset_seen: | ||||
|                     log.info("preparing to download whole stickerset %s as %s", setid, dldir) | ||||
|                     stickerset_seen.add(setid.id) | ||||
|                     try: | ||||
|                         for doc in client(functions.messages.GetStickerSetRequest(stickerset=setid,hash=0)).documents: | ||||
|                             try:  | ||||
|                                 download_sticker(client, doc) | ||||
|                             except Exception as e: | ||||
|                                 log.critical("oops: %s", exc_info=e) | ||||
|                     except errors.rpcerrorlist.StickersetInvalidError: | ||||
|                         log.warning("sadly, stickerset %s no longer exists", setid.id) | ||||
|         except Exception as e: | ||||
|             log.error("somethin wrong happened during checking message: %s", msg.stringify(), exc_info=e) | ||||
| def iter_sources(client, sources,): | ||||
|     for dialog in fetch_dialogs(client.iter_dialogs(), sources): | ||||
|         for message in client.iter_messages(dialog, limit=None): | ||||
|             try: | ||||
|                 if not hasattr(message, "media"): | ||||
|                     log.debug("message %i has no media", message.id) | ||||
|                     continue | ||||
|                 if not hasattr(message.media, "document"): | ||||
|                     log.debug("message %i has no documents", message.id) | ||||
|                     continue | ||||
|                 for a in message.media.document.attributes: | ||||
|                     if hasattr(a, "stickerset"): | ||||
|                         log.debug("document %s is a sticker", message.media.document.id) | ||||
|                         yield message.media.document  | ||||
|                         if isinstance(a.stickerset, types.InputStickerSetID): | ||||
|                             log.debug("document %s belongs to stickerset", message.media.document.id) | ||||
|                             yield a.stickerset | ||||
|             except Exception as e: | ||||
|                 log.error("somethin wrong happened during checking message: %s", message.stringify(), exc_info=e) | ||||
| 
 | ||||
| 
 | ||||
| class StickerDownloader: | ||||
|     mimes={ | ||||
|         "image/webp": "webp", | ||||
|         "application/x-tgsticker": "tgs", | ||||
|         "video/webm": "webm" | ||||
|         } | ||||
|     _log = logging.getLogger("gentoobot.downloader") | ||||
| 
 | ||||
|     def __init__(self, client, dl_root): | ||||
|         self.downloads_root = Path(str(dl_root)) | ||||
|         self.downloads_root.mkdir(exist_ok=True) | ||||
|         self.sticker_downloads = self.downloads_root/"stickers" | ||||
|         self.sticker_downloads.mkdir(exist_ok=True) | ||||
|         self.downloads_root.mkdir(exist_ok=True) | ||||
|         self.seen_stickers = set() | ||||
|         self.seen_packs = set() | ||||
|         self.client = client | ||||
| 
 | ||||
|     @classmethod | ||||
|     def get_sticker_metadata(cls,document): | ||||
|         meta = { | ||||
|                 "id": document.id, | ||||
|                 "date": document.date | ||||
|                 } | ||||
|         for a in document.attributes: | ||||
|             if isinstance(a, types.DocumentAttributeSticker): | ||||
|                 if isinstance(a.stickerset, types.InputStickerSetID): | ||||
|                     cls._log.debug("document %s is a normal sticker", document.id) | ||||
|                     meta["pack"] = a.stickerset.id | ||||
|                 if hasattr(a, 'alt'): | ||||
|                     cls._log.debug('sticker has alt text %s', a.alt) | ||||
|                     meta["alt"] = a.alt | ||||
|         return meta | ||||
| 
 | ||||
|     def download(self, document): | ||||
|         if document.id not in self.seen_stickers: | ||||
|             try: | ||||
|                 meta = self.get_sticker_metadata(document) | ||||
|                 to_dir = self.sticker_downloads / str(meta.get("pack", "inline")) | ||||
|                 to_dir.mkdir(exist_ok=True) | ||||
|                 to_file = to_dir / (str(document.id)+"."+self.mimes.get(document.mime_type, "")) | ||||
|                 if not  to_file.exists() or to_file.stat().st_size != document.size: | ||||
|                     self._log.info("downloading %s to %s", document.id, str(to_file)) | ||||
|                     self.client.download_file(document, to_file) | ||||
|                     self.seen_stickers.add(document.id) | ||||
|                 else: | ||||
|                     self._log.info("document %s already downloaded at %s", document.id, str(to_file)) | ||||
|             except Exception as e: | ||||
|                self._log.critical("oops: ", exc_info=e) | ||||
| 
 | ||||
| 
 | ||||
|     def download_pack(self, inputpack): | ||||
|         if inputpack.id not in self.seen_packs: | ||||
|             try: | ||||
|                 req = functions.messages.GetStickerSetRequest(stickerset=inputpack,hash=0) | ||||
|                 stickers=self.client(req) | ||||
|                 sticker_dir = self.sticker_downloads/str(inputpack.id) | ||||
|                 sticker_dir.mkdir(exist_ok=True) | ||||
|                 with open(str(sticker_dir/"meta.txt"), "w") as meta: | ||||
|                     meta.write(stickers.to_json()) | ||||
|                 self._log.info("preparing to download whole stickerset %s", inputpack.id) | ||||
|                 for doc in stickers.documents: | ||||
|                     self.download(doc) | ||||
|                 self.seen_packs.add(inputpack.id) | ||||
|             except errors.rpcerrorlist.StickersetInvalidError: | ||||
|                 self._log.warning("sadly, stickerset %s no longer exists", inputpack.id) | ||||
| 
 | ||||
| def load_config(path): | ||||
|     log.debug("opening %s", repr(path)) | ||||
|     with open(str(path)) as cfgstream: | ||||
|         cfg = yaml.safe_load(cfgstream) | ||||
|         log.debug(cfg) | ||||
|     return cfg | ||||
| 
 | ||||
| def make_client(nth): | ||||
|     cfg = load_config("config.yaml") | ||||
|     app_id = cfg["apps"][nth]["id"] | ||||
|     app_hash = cfg["apps"][nth]['hash'] | ||||
|     log.debug("client id %s hash %s", app_id, app_hash) | ||||
|     return TelegramClient("session"+str(nth)+str(app_id), app_id, app_hash) | ||||
| 
 | ||||
| def main(): | ||||
|     cfg = load_config("config.yaml") | ||||
|     app_id = cfg["apps"][0]["id"] | ||||
|     app_hash = cfg["apps"][0]['hash'] | ||||
|     log.debug("client id %s hash %s", app_id, app_hash) | ||||
|     client = TelegramClient("session0"+str(app_id), app_id, app_hash) | ||||
|     client.start() | ||||
|     log.debug(client.get_me().stringify()) | ||||
|     dl = StickerDownloader(client, "./download") | ||||
|     try: | ||||
|         for found in iter_sources(client, cfg["sources"]): | ||||
|             if isinstance(found, types.Document): | ||||
|                 dl.download(found) | ||||
|             elif isinstance(found, types.InputStickerSetID): | ||||
|                 dl.download_pack(found) | ||||
|     except KeyboardInterrupt: | ||||
|         log.warn("exiting: interrupted by keyboard") | ||||
|     client.disconnect() | ||||
| 
 | ||||
| 
 | ||||
| 
 | ||||
| @ -92,18 +152,4 @@ def process_archive(archive, stickerset_seen=set()): | ||||
| 
 | ||||
| 
 | ||||
| if __name__ == "__main__": | ||||
|     log.debug("opening %s", repr("config/bot.yaml")) | ||||
|     with open("config/bot.yaml") as cfgstream: | ||||
|         cfg = yaml.safe_load(cfgstream) | ||||
|         log.debug(cfg) | ||||
|         app = cfg["apps"][0] | ||||
|     log.debug("starting client with id %s and hash %s", app['id'], app['hash']) | ||||
|     client = TelegramClient('env/gentoo_session', app['id'], app['hash']) | ||||
|     client.start() | ||||
|     log.debug(client.get_me().stringify()) | ||||
|     try: | ||||
|         for sticker_archive in fetch_dialogs(client): | ||||
|             process_archive(sticker_archive) | ||||
|     except KeyboardInterrupt: | ||||
|         pass | ||||
|     client.disconnect() | ||||
|     main() | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user