commit 8d6005eec137ea6c934d39180ec928479c56e081 Author: Aleksey Chubukov Date: Tue Sep 27 20:53:53 2022 +0300 first iteration of sticker downloader diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..66ae6d3 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +/config +/env diff --git a/Pipfile b/Pipfile new file mode 100644 index 0000000..97a1824 --- /dev/null +++ b/Pipfile @@ -0,0 +1,16 @@ +[[source]] +url = "https://pypi.org/simple" +verify_ssl = true +name = "pypi" + +[packages] +pyyaml = "*" +telethon = "*" + +[dev-packages] + +[requires] +python_version = "3.9" + +[scripts] +bot = "python3 ./bot.py" diff --git a/Pipfile.lock b/Pipfile.lock new file mode 100644 index 0000000..5592b11 --- /dev/null +++ b/Pipfile.lock @@ -0,0 +1,107 @@ +{ + "_meta": { + "hash": { + "sha256": "18ee0ccb31a00e80d6d010330d50a07dae82bf22c4304de7bc8f18b36a45c7cb" + }, + "pipfile-spec": 6, + "requires": { + "python_version": "3.9" + }, + "sources": [ + { + "name": "pypi", + "url": "https://pypi.org/simple", + "verify_ssl": true + } + ] + }, + "default": { + "pyaes": { + "hashes": [ + "sha256:02c1b1405c38d3c370b085fb952dd8bea3fadcee6411ad99f312cc129c536d8f" + ], + "version": "==1.6.1" + }, + "pyasn1": { + "hashes": [ + "sha256:014c0e9976956a08139dc0712ae195324a75e142284d5f87f1a87ee1b068a359", + "sha256:03840c999ba71680a131cfaee6fab142e1ed9bbd9c693e285cc6aca0d555e576", + "sha256:0458773cfe65b153891ac249bcf1b5f8f320b7c2ce462151f8fa74de8934becf", + "sha256:08c3c53b75eaa48d71cf8c710312316392ed40899cb34710d092e96745a358b7", + "sha256:39c7e2ec30515947ff4e87fb6f456dfc6e84857d34be479c9d4a4ba4bf46aa5d", + "sha256:5c9414dcfede6e441f7e8f81b43b34e834731003427e5b09e4e00e3172a10f00", + "sha256:6e7545f1a61025a4e58bb336952c5061697da694db1cae97b116e9c46abcf7c8", + "sha256:78fa6da68ed2727915c4767bb386ab32cdba863caa7dbe473eaae45f9959da86", + "sha256:7ab8a544af125fb704feadb008c99a88805126fb525280b2270bb25cc1d78a12", + "sha256:99fcc3c8d804d1bc6d9a099921e39d827026409a58f2a720dcdb89374ea0c776", + "sha256:aef77c9fb94a3ac588e87841208bdec464471d9871bd5050a287cc9a475cd0ba", + "sha256:e89bf84b5437b532b0803ba5c9a5e054d21fec423a89952a74f87fa2c9b7bce2", + "sha256:fec3e9d8e36808a28efb59b489e4528c10ad0f480e57dcc32b4de5c9d8c9fdf3" + ], + "version": "==0.4.8" + }, + "pyyaml": { + "hashes": [ + "sha256:01b45c0191e6d66c470b6cf1b9531a771a83c1c4208272ead47a3ae4f2f603bf", + "sha256:0283c35a6a9fbf047493e3a0ce8d79ef5030852c51e9d911a27badfde0605293", + "sha256:055d937d65826939cb044fc8c9b08889e8c743fdc6a32b33e2390f66013e449b", + "sha256:07751360502caac1c067a8132d150cf3d61339af5691fe9e87803040dbc5db57", + "sha256:0b4624f379dab24d3725ffde76559cff63d9ec94e1736b556dacdfebe5ab6d4b", + "sha256:0ce82d761c532fe4ec3f87fc45688bdd3a4c1dc5e0b4a19814b9009a29baefd4", + "sha256:1e4747bc279b4f613a09eb64bba2ba602d8a6664c6ce6396a4d0cd413a50ce07", + "sha256:213c60cd50106436cc818accf5baa1aba61c0189ff610f64f4a3e8c6726218ba", + "sha256:231710d57adfd809ef5d34183b8ed1eeae3f76459c18fb4a0b373ad56bedcdd9", + "sha256:277a0ef2981ca40581a47093e9e2d13b3f1fbbeffae064c1d21bfceba2030287", + "sha256:2cd5df3de48857ed0544b34e2d40e9fac445930039f3cfe4bcc592a1f836d513", + "sha256:40527857252b61eacd1d9af500c3337ba8deb8fc298940291486c465c8b46ec0", + "sha256:432557aa2c09802be39460360ddffd48156e30721f5e8d917f01d31694216782", + "sha256:473f9edb243cb1935ab5a084eb238d842fb8f404ed2193a915d1784b5a6b5fc0", + "sha256:48c346915c114f5fdb3ead70312bd042a953a8ce5c7106d5bfb1a5254e47da92", + "sha256:50602afada6d6cbfad699b0c7bb50d5ccffa7e46a3d738092afddc1f9758427f", + "sha256:68fb519c14306fec9720a2a5b45bc9f0c8d1b9c72adf45c37baedfcd949c35a2", + "sha256:77f396e6ef4c73fdc33a9157446466f1cff553d979bd00ecb64385760c6babdc", + "sha256:81957921f441d50af23654aa6c5e5eaf9b06aba7f0a19c18a538dc7ef291c5a1", + "sha256:819b3830a1543db06c4d4b865e70ded25be52a2e0631ccd2f6a47a2822f2fd7c", + "sha256:897b80890765f037df3403d22bab41627ca8811ae55e9a722fd0392850ec4d86", + "sha256:98c4d36e99714e55cfbaaee6dd5badbc9a1ec339ebfc3b1f52e293aee6bb71a4", + "sha256:9df7ed3b3d2e0ecfe09e14741b857df43adb5a3ddadc919a2d94fbdf78fea53c", + "sha256:9fa600030013c4de8165339db93d182b9431076eb98eb40ee068700c9c813e34", + "sha256:a80a78046a72361de73f8f395f1f1e49f956c6be882eed58505a15f3e430962b", + "sha256:afa17f5bc4d1b10afd4466fd3a44dc0e245382deca5b3c353d8b757f9e3ecb8d", + "sha256:b3d267842bf12586ba6c734f89d1f5b871df0273157918b0ccefa29deb05c21c", + "sha256:b5b9eccad747aabaaffbc6064800670f0c297e52c12754eb1d976c57e4f74dcb", + "sha256:bfaef573a63ba8923503d27530362590ff4f576c626d86a9fed95822a8255fd7", + "sha256:c5687b8d43cf58545ade1fe3e055f70eac7a5a1a0bf42824308d868289a95737", + "sha256:cba8c411ef271aa037d7357a2bc8f9ee8b58b9965831d9e51baf703280dc73d3", + "sha256:d15a181d1ecd0d4270dc32edb46f7cb7733c7c508857278d3d378d14d606db2d", + "sha256:d4b0ba9512519522b118090257be113b9468d804b19d63c71dbcf4a48fa32358", + "sha256:d4db7c7aef085872ef65a8fd7d6d09a14ae91f691dec3e87ee5ee0539d516f53", + "sha256:d4eccecf9adf6fbcc6861a38015c2a64f38b9d94838ac1810a9023a0609e1b78", + "sha256:d67d839ede4ed1b28a4e8909735fc992a923cdb84e618544973d7dfc71540803", + "sha256:daf496c58a8c52083df09b80c860005194014c3698698d1a57cbcfa182142a3a", + "sha256:dbad0e9d368bb989f4515da330b88a057617d16b6a8245084f1b05400f24609f", + "sha256:e61ceaab6f49fb8bdfaa0f92c4b57bcfbea54c09277b1b4f7ac376bfb7a7c174", + "sha256:f84fbc98b019fef2ee9a1cb3ce93e3187a6df0b2538a651bfb890254ba9f90b5" + ], + "index": "pypi", + "version": "==6.0" + }, + "rsa": { + "hashes": [ + "sha256:90260d9058e514786967344d0ef75fa8727eed8a7d2e43ce9f4bcf1b536174f7", + "sha256:e38464a49c6c85d7f1351b0126661487a7e0a14a50f1675ec50eb34d4f20ef21" + ], + "markers": "python_version >= '3.6' and python_version < '4'", + "version": "==4.9" + }, + "telethon": { + "hashes": [ + "sha256:06edc1852ae0eacef6f598b96638cf1fbd30e505bd314268ff762eaf3c1d550f", + "sha256:3a6c89fb3108cbc6872a5056ad3dddd0895825f9b08a549216f35f231ac2e611" + ], + "index": "pypi", + "version": "==1.25.1" + } + }, + "develop": {} +} diff --git a/bot.py b/bot.py new file mode 100644 index 0000000..a81f2d4 --- /dev/null +++ b/bot.py @@ -0,0 +1,132 @@ +#!/usr/bin/env python3 +import yaml +from telethon import TelegramClient, events, sync +import logging +from telethon import functions, types, errors +from pathlib import Path + +mimes={ + "image/webp": "webp", + "application/x-tgsticker": "tgs", + "video/webm": "webm" + } +w = Path(".") +wn = w / "env" +wn.mkdir(exist_ok=True) +wd = wn/"download" +wd.mkdir(exist_ok=True) +wdi = wd/"inline" +wdi.mkdir(exist_ok=True) +logging.basicConfig(filename=str(wn/"bot.log"),level=logging.DEBUG) +log = logging.getLogger("gentoobot") +dllog=logging.getLogger("gentoobot.download") + +def download_sticker(client, document): + stickerdir, setid = get_sticker_setid(document) + if stickerdir is None: + return + dldir = wd / stickerdir + dldir.mkdir(exist_ok=True) + dlpath = dldir / (str(document.id)+"."+mimes.get(document.mime_type, "")) + if not dlpath.exists() or dlpath.stat().st_size != document.size: + dllog.info("downloading %s to %s", document.id, str(dlpath)) + client.download_file(document, dlpath) + else: + dllog.info("document %s already downloaded at %s", document.id, str(dlpath)) + +def get_sticker_setid(document): + stickerset = None + for a in document.attributes: + if hasattr(a, "stickerset"): + stickerset = a.stickerset + if stickerset is None: + dllog.info("document %s is not a sticker", document.id) + return None, None + if isinstance(stickerset, types.InputStickerSetID): + log.info("document %s is a normal sticker", document.id) + return str(stickerset.id), stickerset + if isinstance(stickerset, types.InputStickerSetEmpty): + dllog.info("document %s is an inline sticker", document.id) + return "inline", stickerset + +def fetch_dialogs(client): + sticker_archives=list() + for dialog in client.iter_dialogs(): + log.debug(dialog) + if dialog.entity.id in cfg["stickers"]["sources"]["dialogs"]: + log.info(dialog.stringify()) + yield dialog + +def main(client): + stickerset_seen=set() + for sticker_archive in fetch_dialogs(client): + for msg in client.iter_messages(sticker_archive, limit=None): + log.debug(msg) + if not hasattr(msg, "media"): + log.debug("message %i has no media", msg.id) + continue + if not hasattr(msg.media, "document"): + log.debug("message %i has no documents", msg.id) + continue + try: + dldir, setid = get_sticker_setid(msg.media.document) + if dldir == "inline": + download_sticker(client, msg.media.document) + elif dldir is not None: + download_sticker(client, msg.media.document) + if setid.id not in stickerset_seen: + log.debug("preparing to download whole stickerset %s as %s", setid, dldir) + stickerset_seen.add(setid.id) + try: + for doc in client(functions.messages.GetStickerSetRequest(stickerset=setid,hash=0)).documents: + try: + download_sticker(client, doc) + except Exception as e: + log.critical("oops: %s", exc_info=e) + except errors.rpcerrorlist.StickersetInvalidError: + log.warning("sadly, stickerset %s no longer exists", setid.id) + except Exception as e: + log.error("somethin wrong happened during checking message: %s", msg.stringify(), exc_info=e) + + # for doc in stickers_inline: + # try: + # download_sticker(client, doc) + # except Exception as e: + # log.critical("ouchie: %s", exc_info=e) + + # for stickerset in stickers_sets.values(): + # try: + # for doc in client(functions.messages.GetStickerSetRequest(stickerset=stickerset,hash=0)).documents: + # try: + # download_sticker(client, doc) + # except Exception as e: + # log.critical("oops: %s", exc_info=e) + # except errors.rpcerrorlist.StickersetInvalidError: + # log.warning("sadly, stickerset %s no longer exists", stickerset.id) + # except Exception as e: + # log.critical("omg: %s", exc_info=e) + + +#################### + + +log.debug("opening %s", repr("config/bot.yaml")) +with open("config/bot.yaml") as cfgstream: + cfg = yaml.safe_load(cfgstream) + log.debug(cfg) + app = cfg["apps"][0] +log.debug("starting client with id %s and hash %s", app['id'], app['hash']) +client = TelegramClient('env/gentoo_session', app['id'], app['hash']) +client.start() +log.debug(client.get_me().stringify()) +stickers_sets=dict() +stickers_favorites=set() +stickers_inline=list() +sticker_archives=list() +try: + main(client) +except KeyboardInterrupt: + pass +client.disconnect() + +