- add tags to crawl object - ensure tags are copied from crawlconfig to crawl when crawl is created (both manually and scheduled) - tests: add test to ensure tags added to crawl, remove redundant wait replaced with fixtures
This commit is contained in:
parent
49460bb070
commit
2daa742585
@ -47,6 +47,7 @@ class CrawlJob(ABC):
|
||||
self.userid = uuid.UUID(os.environ["USER_ID"])
|
||||
|
||||
self.is_manual = os.environ.get("RUN_MANUAL") == "1"
|
||||
self.tags = os.environ.get("TAGS", "").split(",")
|
||||
|
||||
self.scale = int(os.environ.get("INITIAL_SCALE") or 0)
|
||||
|
||||
@ -360,6 +361,7 @@ class CrawlJob(ABC):
|
||||
manual=self.is_manual,
|
||||
scale=scale,
|
||||
started=self.started,
|
||||
tags=self.tags,
|
||||
# colls=json.loads(job.metadata.annotations.get("btrix.colls", [])),
|
||||
)
|
||||
|
||||
|
||||
@ -183,6 +183,7 @@ class BaseCrawlManager(ABC):
|
||||
"schedule": schedule,
|
||||
"env": os.environ,
|
||||
"mongo_db_url": resolve_db_url(),
|
||||
"tags": ",".join(crawlconfig.tags),
|
||||
}
|
||||
|
||||
return self.templates.env.get_template("crawl_job.yaml").render(params)
|
||||
|
||||
@ -84,6 +84,7 @@ class Crawl(BaseMongoModel):
|
||||
files: Optional[List[CrawlFile]] = []
|
||||
|
||||
colls: Optional[List[str]] = []
|
||||
tags: Optional[List[str]] = []
|
||||
|
||||
|
||||
# ============================================================================
|
||||
@ -121,6 +122,7 @@ class ListCrawlOut(BaseMongoModel):
|
||||
fileCount: int = 0
|
||||
|
||||
colls: Optional[List[str]] = []
|
||||
tags: Optional[List[str]] = []
|
||||
|
||||
|
||||
# ============================================================================
|
||||
@ -358,6 +360,7 @@ class CrawlOps:
|
||||
scale=crawlconfig.scale,
|
||||
manual=True,
|
||||
started=ts_now(),
|
||||
tags=crawlconfig.tags,
|
||||
)
|
||||
|
||||
try:
|
||||
|
||||
@ -70,6 +70,9 @@ spec:
|
||||
- name: CRAWL_CONFIG_ID
|
||||
value: "{{ cid }}"
|
||||
|
||||
- name: TAGS
|
||||
value: "{{ tags }}"
|
||||
|
||||
- name: STORE_PATH
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
|
||||
@ -58,7 +58,11 @@ def admin_crawl_id(admin_auth_headers, admin_aid):
|
||||
crawl_data = {
|
||||
"runNow": True,
|
||||
"name": "Admin Test Crawl",
|
||||
"config": {"seeds": ["https://webrecorder.net/"], "limit": 1},
|
||||
"tags": ["wr-test-1", "wr-test-2"],
|
||||
"config": {
|
||||
"seeds": ["https://webrecorder.net/"],
|
||||
"limit": 1,
|
||||
},
|
||||
}
|
||||
r = requests.post(
|
||||
f"{API_PREFIX}/archives/{admin_aid}/crawlconfigs/",
|
||||
|
||||
@ -14,7 +14,7 @@ def test_ensure_only_one_default_org(admin_auth_headers):
|
||||
default_org_name = default_orgs[0]["name"]
|
||||
orgs_with_same_name = [org for org in orgs if org["name"] == default_org_name]
|
||||
assert len(orgs_with_same_name) == 1
|
||||
|
||||
|
||||
|
||||
def test_rename_org(admin_auth_headers, admin_aid):
|
||||
UPDATED_NAME = "updated org name"
|
||||
@ -35,6 +35,7 @@ def test_rename_org(admin_auth_headers, admin_aid):
|
||||
data = r.json()
|
||||
assert data["name"] == UPDATED_NAME
|
||||
|
||||
|
||||
def test_create_org(admin_auth_headers):
|
||||
NEW_ORG_NAME = "New Org"
|
||||
r = requests.post(
|
||||
|
||||
@ -46,28 +46,18 @@ def test_create_new_config(admin_auth_headers, admin_aid):
|
||||
|
||||
|
||||
def test_wait_for_complete(admin_auth_headers, admin_aid, admin_crawl_id):
|
||||
print("")
|
||||
print("---- Running Crawl ----")
|
||||
|
||||
while True:
|
||||
r = requests.get(
|
||||
f"{API_PREFIX}/archives/{admin_aid}/crawls/{admin_crawl_id}/replay.json",
|
||||
headers=admin_auth_headers,
|
||||
)
|
||||
data = r.json()
|
||||
assert (
|
||||
data["state"] == "starting"
|
||||
or data["state"] == "running"
|
||||
or data["state"] == "complete"
|
||||
), data["state"]
|
||||
if data["state"] == "complete":
|
||||
break
|
||||
|
||||
time.sleep(5)
|
||||
r = requests.get(
|
||||
f"{API_PREFIX}/archives/{admin_aid}/crawls/{admin_crawl_id}/replay.json",
|
||||
headers=admin_auth_headers,
|
||||
)
|
||||
data = r.json()
|
||||
assert data["state"] == "complete"
|
||||
|
||||
assert len(data["resources"]) == 1
|
||||
assert data["resources"][0]["path"]
|
||||
|
||||
assert data["tags"] == ["wr-test-1", "wr-test-2"]
|
||||
|
||||
global wacz_path
|
||||
global wacz_size
|
||||
global wacz_hash
|
||||
|
||||
Loading…
Reference in New Issue
Block a user