Fixes #1432 Refactors the invite + registration system to be simpler and more consistent with regards to existing user invites. Previously, per-user invites are stored in the user.invites dict instead of in the invites collection, which creates a few issues: - Existing user do not show up in Org Invites list: #1432 - Existing user invites also do not expire, unlike new user invites, creating potential security issue. Instead, existing user invites should be treated like new user invites. This PR moves them into the same collection, adding a `userid` field to InvitePending to match with an existing user. If a user already exists, it will be matched by userid, instead of by email. This allows for user to update their email while still being invited. Note that the email of the invited existing user will not change in the invite email. This is also by design: an admin of one org should not be given any hint that an invited user already has an account, such as by having their email automatically update. For an org admin, the invite to a new or existing user should be indistinguishable. The sha256 of invite token is stored instead of actual token for better security. The registration system has also been refactored with the following changes: - Auto-creation of new orgs for new users has been removed - User.create_user() replaces the old User._create() and just creates the user with additional complex logic around org auto-add - Users are added to org in org add_user_to_org() - Users are added to org through invites with add_user_with_invite() Tests: - Additional tests include verifying that existing and new pending invites appear in the pending invites list - Tests for `/users/invite/<token>?email=` and `/users/me/invite/<token>` endpoints - Deleting pending invites - Additional tests added for user self-registration, including existing user self-registration to default org of existing user (in nightly tests)
323 lines
8.9 KiB
Python
323 lines
8.9 KiB
Python
import pytest
|
|
import requests
|
|
import time
|
|
import datetime
|
|
|
|
|
|
HOST_PREFIX = "http://127.0.0.1:30870"
|
|
API_PREFIX = HOST_PREFIX + "/api"
|
|
|
|
ADMIN_USERNAME = "admin@example.com"
|
|
ADMIN_PW = "PASSW0RD!"
|
|
|
|
CRAWLER_USERNAME = "crawlernightly@example.com"
|
|
CRAWLER_PW = "crawlerPASSWORD!"
|
|
|
|
|
|
@pytest.fixture(scope="session")
|
|
def admin_auth_headers():
|
|
while True:
|
|
r = requests.post(
|
|
f"{API_PREFIX}/auth/jwt/login",
|
|
data={
|
|
"username": ADMIN_USERNAME,
|
|
"password": ADMIN_PW,
|
|
"grant_type": "password",
|
|
},
|
|
)
|
|
data = r.json()
|
|
try:
|
|
return {"Authorization": f"Bearer {data['access_token']}"}
|
|
except:
|
|
print("Waiting for admin_auth_headers")
|
|
time.sleep(5)
|
|
|
|
|
|
@pytest.fixture(scope="session")
|
|
def default_org_id(admin_auth_headers):
|
|
while True:
|
|
r = requests.get(f"{API_PREFIX}/orgs", headers=admin_auth_headers)
|
|
data = r.json()
|
|
try:
|
|
for org in data["items"]:
|
|
if org["default"] is True:
|
|
return org["id"]
|
|
except:
|
|
print("Waiting for default org id")
|
|
time.sleep(5)
|
|
|
|
|
|
@pytest.fixture(scope="session")
|
|
def crawler_auth_headers(admin_auth_headers, default_org_id):
|
|
requests.post(
|
|
f"{API_PREFIX}/orgs/{default_org_id}/add-user",
|
|
json={
|
|
"email": CRAWLER_USERNAME,
|
|
"password": CRAWLER_PW,
|
|
"name": "new-crawler",
|
|
"role": 20,
|
|
},
|
|
headers=admin_auth_headers,
|
|
)
|
|
r = requests.post(
|
|
f"{API_PREFIX}/auth/jwt/login",
|
|
data={
|
|
"username": CRAWLER_USERNAME,
|
|
"password": CRAWLER_PW,
|
|
"grant_type": "password",
|
|
},
|
|
)
|
|
data = r.json()
|
|
access_token = data.get("access_token")
|
|
return {"Authorization": f"Bearer {access_token}"}
|
|
|
|
|
|
@pytest.fixture(scope="session")
|
|
def crawl_id_wr(admin_auth_headers, default_org_id):
|
|
# Start crawl.
|
|
crawl_data = {
|
|
"runNow": True,
|
|
"name": "Webrecorder admin test crawl",
|
|
"tags": ["wr", "nightly testing"],
|
|
"config": {
|
|
"seeds": [{"url": "https://webrecorder.net/"}],
|
|
"limit": 1,
|
|
},
|
|
}
|
|
r = requests.post(
|
|
f"{API_PREFIX}/orgs/{default_org_id}/crawlconfigs/",
|
|
headers=admin_auth_headers,
|
|
json=crawl_data,
|
|
)
|
|
data = r.json()
|
|
|
|
crawl_id = data["run_now_job"]
|
|
# Wait for it to complete and then return crawl ID
|
|
while True:
|
|
r = requests.get(
|
|
f"{API_PREFIX}/orgs/{default_org_id}/crawls/{crawl_id}/replay.json",
|
|
headers=admin_auth_headers,
|
|
)
|
|
data = r.json()
|
|
if data["state"] == "complete":
|
|
return crawl_id
|
|
time.sleep(5)
|
|
|
|
|
|
@pytest.fixture(scope="session")
|
|
def crawl_id_wr_specs(admin_auth_headers, default_org_id):
|
|
# Start crawl.
|
|
crawl_data = {
|
|
"runNow": True,
|
|
"name": "Webrecorder Specs admin test crawl",
|
|
"tags": ["wr-specs", "nightly testing"],
|
|
"config": {
|
|
"seeds": [{"url": "https://specs.webrecorder.net/"}],
|
|
"limit": 1,
|
|
},
|
|
}
|
|
r = requests.post(
|
|
f"{API_PREFIX}/orgs/{default_org_id}/crawlconfigs/",
|
|
headers=admin_auth_headers,
|
|
json=crawl_data,
|
|
)
|
|
data = r.json()
|
|
|
|
crawl_id = data["run_now_job"]
|
|
# Wait for it to complete and then return crawl ID
|
|
while True:
|
|
r = requests.get(
|
|
f"{API_PREFIX}/orgs/{default_org_id}/crawls/{crawl_id}/replay.json",
|
|
headers=admin_auth_headers,
|
|
)
|
|
data = r.json()
|
|
if data["state"] == "complete":
|
|
return crawl_id
|
|
time.sleep(5)
|
|
|
|
|
|
@pytest.fixture(scope="session")
|
|
def crawl_config_info(admin_auth_headers, default_org_id):
|
|
# Start crawl.
|
|
crawl_data = {
|
|
"runNow": True,
|
|
"name": "Crawl config test",
|
|
"config": {"seeds": [{"url": "https://specs.webrecorder.net/"}], "limit": 1},
|
|
}
|
|
r = requests.post(
|
|
f"{API_PREFIX}/orgs/{default_org_id}/crawlconfigs/",
|
|
headers=admin_auth_headers,
|
|
json=crawl_data,
|
|
)
|
|
data = r.json()
|
|
|
|
crawl_config_id = data["id"]
|
|
crawl_id = data["run_now_job"]
|
|
while True:
|
|
r = requests.get(
|
|
f"{API_PREFIX}/orgs/{default_org_id}/crawls/{crawl_id}/replay.json",
|
|
headers=admin_auth_headers,
|
|
)
|
|
data = r.json()
|
|
if data["state"] == "complete":
|
|
break
|
|
time.sleep(5)
|
|
|
|
# Run second crawl from crawlconfig and return info when it finishes
|
|
r = requests.post(
|
|
f"{API_PREFIX}/orgs/{default_org_id}/crawlconfigs/{crawl_config_id}/run",
|
|
headers=admin_auth_headers,
|
|
)
|
|
data = r.json()
|
|
second_crawl_id = data["started"]
|
|
while True:
|
|
r = requests.get(
|
|
f"{API_PREFIX}/orgs/{default_org_id}/crawls/{second_crawl_id}/replay.json",
|
|
headers=admin_auth_headers,
|
|
)
|
|
data = r.json()
|
|
if data["state"] == "complete":
|
|
return (crawl_config_id, crawl_id, second_crawl_id)
|
|
time.sleep(5)
|
|
|
|
|
|
@pytest.fixture(scope="session")
|
|
def large_crawl_id(admin_auth_headers, default_org_id):
|
|
# Start crawl
|
|
crawl_data = {
|
|
"runNow": True,
|
|
"name": "Large Test Crawl",
|
|
"tags": ["wacz-logs"],
|
|
"config": {
|
|
"seeds": [{"url": "https://webrecorder.net/"}],
|
|
"scopeType": "domain",
|
|
"limit": 100,
|
|
"extraHops": 1,
|
|
},
|
|
}
|
|
r = requests.post(
|
|
f"{API_PREFIX}/orgs/{default_org_id}/crawlconfigs/",
|
|
headers=admin_auth_headers,
|
|
json=crawl_data,
|
|
)
|
|
data = r.json()
|
|
|
|
crawl_id = data["run_now_job"]
|
|
|
|
while True:
|
|
r = requests.get(
|
|
f"{API_PREFIX}/orgs/{default_org_id}/crawls/{crawl_id}/replay.json",
|
|
headers=admin_auth_headers,
|
|
)
|
|
data = r.json()
|
|
if data["state"] == "running":
|
|
# Give crawl time to start properly
|
|
time.sleep(30)
|
|
return crawl_id
|
|
time.sleep(5)
|
|
|
|
|
|
@pytest.fixture(scope="session")
|
|
def large_crawl_finished(admin_auth_headers, default_org_id, large_crawl_id):
|
|
# Wait for crawl to complete
|
|
while True:
|
|
r = requests.get(
|
|
f"{API_PREFIX}/orgs/{default_org_id}/crawls/{large_crawl_id}/replay.json",
|
|
headers=admin_auth_headers,
|
|
)
|
|
data = r.json()
|
|
if data["state"] == "complete":
|
|
# Give some time for WACZ files to be stored
|
|
time.sleep(30)
|
|
break
|
|
time.sleep(5)
|
|
|
|
|
|
@pytest.fixture(scope="session")
|
|
def timeout_crawl(admin_auth_headers, default_org_id):
|
|
# Start crawl
|
|
crawl_data = {
|
|
"runNow": True,
|
|
"name": "Crawl with crawl timeout",
|
|
"crawlTimeout": 15,
|
|
"config": {
|
|
"seeds": [{"url": "https://webrecorder.net/"}],
|
|
"scopeType": "domain",
|
|
"limit": 100,
|
|
},
|
|
}
|
|
r = requests.post(
|
|
f"{API_PREFIX}/orgs/{default_org_id}/crawlconfigs/",
|
|
headers=admin_auth_headers,
|
|
json=crawl_data,
|
|
)
|
|
data = r.json()
|
|
return data["run_now_job"]
|
|
|
|
|
|
@pytest.fixture(scope="session")
|
|
def max_crawl_size_crawl_id(admin_auth_headers, default_org_id):
|
|
# Start crawl
|
|
crawl_data = {
|
|
"runNow": True,
|
|
"name": "Crawl with 5 MB max crawl size limit",
|
|
# Note crawl will exceed this size, as crawl begins to gracefully
|
|
# shut down when operator notices this value has been exceeded.
|
|
"maxCrawlSize": 5242880,
|
|
"config": {
|
|
"seeds": [{"url": "https://webrecorder.net/"}],
|
|
"scopeType": "domain",
|
|
"limit": 100,
|
|
},
|
|
}
|
|
r = requests.post(
|
|
f"{API_PREFIX}/orgs/{default_org_id}/crawlconfigs/",
|
|
headers=admin_auth_headers,
|
|
json=crawl_data,
|
|
)
|
|
data = r.json()
|
|
return data["run_now_job"]
|
|
|
|
|
|
@pytest.fixture(scope="session")
|
|
def error_crawl_id(admin_auth_headers, default_org_id):
|
|
crawl_data = {
|
|
"runNow": True,
|
|
"name": "Invalid URL crawl",
|
|
"config": {
|
|
"seeds": [
|
|
{"url": "https://invalid-x.webrecorder.net/"},
|
|
],
|
|
"limit": 1,
|
|
},
|
|
}
|
|
r = requests.post(
|
|
f"{API_PREFIX}/orgs/{default_org_id}/crawlconfigs/",
|
|
headers=admin_auth_headers,
|
|
json=crawl_data,
|
|
)
|
|
data = r.json()
|
|
|
|
crawl_id = data["run_now_job"]
|
|
|
|
while True:
|
|
r = requests.get(
|
|
f"{API_PREFIX}/orgs/{default_org_id}/crawls/{crawl_id}/replay.json",
|
|
headers=admin_auth_headers,
|
|
)
|
|
data = r.json()
|
|
if data["state"] == "complete":
|
|
return crawl_id
|
|
time.sleep(5)
|
|
|
|
|
|
@pytest.fixture(scope="session")
|
|
def org_with_quotas(admin_auth_headers):
|
|
name = "Quota Org " + datetime.datetime.utcnow().isoformat()
|
|
r = requests.post(
|
|
f"{API_PREFIX}/orgs/create", headers=admin_auth_headers, json={"name": name}
|
|
)
|
|
data = r.json()
|
|
|
|
return data["id"]
|