Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@
from cycode.cli.utils.get_api_client import get_report_cycode_client
from cycode.cli.utils.progress_bar import SbomReportProgressBarSection
from cycode.cli.utils.sentry import add_breadcrumb
from cycode.cli.utils.url_utils import sanitize_repository_url
from cycode.logger import get_logger

logger = get_logger('Repository URL Command')


def repository_url_command(
Expand All @@ -28,8 +32,13 @@ def repository_url_command(
start_scan_time = time.time()
report_execution_id = -1

# Sanitize repository URL to remove any embedded credentials/tokens before sending to API
sanitized_uri = sanitize_repository_url(uri)
if sanitized_uri != uri:
logger.debug('Sanitized repository URL to remove credentials')

try:
report_execution = client.request_sbom_report_execution(report_parameters, repository_url=uri)
report_execution = client.request_sbom_report_execution(report_parameters, repository_url=sanitized_uri)
report_execution_id = report_execution.id

create_sbom_report(progress_bar, client, report_execution_id, output_file, output_format)
Expand Down
11 changes: 9 additions & 2 deletions cycode/cli/apps/scan/remote_url_resolver.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from cycode.cli import consts
from cycode.cli.utils.git_proxy import git_proxy
from cycode.cli.utils.shell_executor import shell
from cycode.cli.utils.url_utils import sanitize_repository_url
from cycode.logger import get_logger

logger = get_logger('Remote URL Resolver')
Expand Down Expand Up @@ -102,7 +103,11 @@ def _try_get_git_remote_url(path: str) -> Optional[str]:
repo = git_proxy.get_repo(path, search_parent_directories=True)
remote_url = repo.remotes[0].config_reader.get('url')
logger.debug('Found Git remote URL, %s', {'remote_url': remote_url, 'repo_path': repo.working_dir})
return remote_url
# Sanitize URL to remove any embedded credentials/tokens before returning
sanitized_url = sanitize_repository_url(remote_url)
if sanitized_url != remote_url:
logger.debug('Sanitized repository URL to remove credentials')
return sanitized_url
except Exception as e:
logger.debug('Failed to get Git remote URL. Probably not a Git repository', exc_info=e)
return None
Expand All @@ -124,7 +129,9 @@ def get_remote_url_scan_parameter(paths: tuple[str, ...]) -> Optional[str]:
# - len(paths)*2 Plastic SCM subprocess calls
remote_url = _try_get_any_remote_url(path)
if remote_url:
remote_urls.add(remote_url)
# URLs are already sanitized in _try_get_git_remote_url, but sanitize again as safety measure
sanitized_url = sanitize_repository_url(remote_url)
remote_urls.add(sanitized_url)

if len(remote_urls) == 1:
# we are resolving remote_url only if all paths belong to the same repo (identical remote URLs),
Expand Down
64 changes: 64 additions & 0 deletions cycode/cli/utils/url_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
from typing import Optional
from urllib.parse import urlparse, urlunparse

from cycode.logger import get_logger

logger = get_logger('URL Utils')


def sanitize_repository_url(url: Optional[str]) -> Optional[str]:
"""Remove credentials (username, password, tokens) from repository URL.

This function sanitizes repository URLs to prevent sending PAT tokens or other
credentials to the API. It handles both HTTP/HTTPS URLs with embedded credentials
and SSH URLs (which are returned as-is since they don't contain credentials in the URL).

Args:
url: Repository URL that may contain credentials (e.g., https://token@github.com/user/repo.git)

Returns:
Sanitized URL without credentials (e.g., https://github.com/user/repo.git), or None if input is None

Examples:
>>> sanitize_repository_url('https://token@github.com/user/repo.git')
'https://github.com/user/repo.git'
>>> sanitize_repository_url('https://user:token@github.com/user/repo.git')
'https://github.com/user/repo.git'
>>> sanitize_repository_url('git@github.com:user/repo.git')
'git@github.com:user/repo.git'
>>> sanitize_repository_url(None)
None
"""
if not url:
return url

# Handle SSH URLs - no credentials to remove
# ssh:// URLs have the format ssh://git@host/path
if url.startswith('ssh://'):
return url
# git@host:path format (scp-style)
if '@' in url and '://' not in url and url.startswith('git@'):
return url

try:
parsed = urlparse(url)
# Remove username and password from netloc
# Reconstruct URL without credentials
sanitized_netloc = parsed.hostname
if parsed.port:
sanitized_netloc = f'{sanitized_netloc}:{parsed.port}'

return urlunparse(
(
parsed.scheme,
sanitized_netloc,
parsed.path,
parsed.params,
parsed.query,
parsed.fragment,
)
)
except Exception as e:
logger.debug('Failed to sanitize repository URL, returning original, %s', {'url': url, 'error': str(e)})
# If parsing fails, return original URL to avoid breaking functionality
return url
10 changes: 9 additions & 1 deletion cycode/cyclient/report_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,12 @@

from cycode.cli.exceptions.custom_exceptions import CycodeError
from cycode.cli.files_collector.models.in_memory_zip import InMemoryZip
from cycode.cli.utils.url_utils import sanitize_repository_url
from cycode.cyclient import models
from cycode.cyclient.cycode_client_base import CycodeClientBase
from cycode.logger import get_logger

logger = get_logger('Report Client')


@dataclasses.dataclass
Expand Down Expand Up @@ -49,7 +53,11 @@ def request_sbom_report_execution(
# entity type required only for zipped-file
request_data = {'report_parameters': params.to_json(without_entity_type=zip_file is None)}
if repository_url:
request_data['repository_url'] = repository_url
# Sanitize repository URL to remove any embedded credentials/tokens before sending to API
sanitized_url = sanitize_repository_url(repository_url)
if sanitized_url != repository_url:
logger.debug('Sanitized repository URL to remove credentials')
request_data['repository_url'] = sanitized_url

request_args = {
'url_path': url_path,
Expand Down
80 changes: 80 additions & 0 deletions tests/utils/test_url_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
from cycode.cli.utils.url_utils import sanitize_repository_url


def test_sanitize_repository_url_with_token() -> None:
"""Test that PAT tokens are removed from HTTPS URLs."""
url = 'https://token@github.com/user/repo.git'
expected = 'https://github.com/user/repo.git'
assert sanitize_repository_url(url) == expected


def test_sanitize_repository_url_with_username_and_token() -> None:
"""Test that username and token are removed from HTTPS URLs."""
url = 'https://user:token@github.com/user/repo.git'
expected = 'https://github.com/user/repo.git'
assert sanitize_repository_url(url) == expected


def test_sanitize_repository_url_with_port() -> None:
"""Test that URLs with ports are handled correctly."""
url = 'https://token@github.com:443/user/repo.git'
expected = 'https://github.com:443/user/repo.git'
assert sanitize_repository_url(url) == expected


def test_sanitize_repository_url_ssh_format() -> None:
"""Test that SSH URLs are returned as-is (no credentials in URL format)."""
url = 'git@github.com:user/repo.git'
assert sanitize_repository_url(url) == url


def test_sanitize_repository_url_ssh_protocol() -> None:
"""Test that ssh:// URLs are returned as-is."""
url = 'ssh://git@github.com/user/repo.git'
assert sanitize_repository_url(url) == url


def test_sanitize_repository_url_no_credentials() -> None:
"""Test that URLs without credentials are returned unchanged."""
url = 'https://github.com/user/repo.git'
assert sanitize_repository_url(url) == url


def test_sanitize_repository_url_none() -> None:
"""Test that None input returns None."""
assert sanitize_repository_url(None) is None


def test_sanitize_repository_url_empty_string() -> None:
"""Test that empty string is returned as-is."""
assert sanitize_repository_url('') == ''


def test_sanitize_repository_url_gitlab() -> None:
"""Test that GitLab URLs are sanitized correctly."""
url = 'https://oauth2:token@gitlab.com/user/repo.git'

Check failure on line 55 in tests/utils/test_url_utils.py

View check run for this annotation

Cycode Security / Cycode: Secrets

tests/utils/test_url_utils.py#L55

Username And Password In Uri found
expected = 'https://gitlab.com/user/repo.git'
assert sanitize_repository_url(url) == expected


def test_sanitize_repository_url_bitbucket() -> None:
"""Test that Bitbucket URLs are sanitized correctly."""
url = 'https://x-token-auth:token@bitbucket.org/user/repo.git'
expected = 'https://bitbucket.org/user/repo.git'
assert sanitize_repository_url(url) == expected


def test_sanitize_repository_url_with_path_and_query() -> None:
"""Test that URLs with paths, query params, and fragments are preserved."""
url = 'https://token@github.com/user/repo.git?ref=main#section'
expected = 'https://github.com/user/repo.git?ref=main#section'
assert sanitize_repository_url(url) == expected


def test_sanitize_repository_url_invalid_url() -> None:
"""Test that invalid URLs are returned as-is (graceful degradation)."""
# This should not raise an exception, but return the original
url = 'not-a-valid-url'
result = sanitize_repository_url(url)
# Should return original since parsing fails
assert result == url