Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
82 changes: 82 additions & 0 deletions bitnet_tools/geo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
from __future__ import annotations

import math
from typing import Any


MISSING_OR_NON_NUMERIC = 'missing_or_non_numeric'
OUT_OF_RANGE = 'out_of_range'
DISTANCE_THRESHOLD_EXCEEDED = 'distance_threshold_exceeded'


def _coerce_float(value: Any) -> float | None:
if value is None:
return None
if isinstance(value, str) and not value.strip():
return None
try:
return float(value)
except (TypeError, ValueError):
return None


def validate_lat_lon(lat: Any, lon: Any) -> bool:
lat_f = _coerce_float(lat)
lon_f = _coerce_float(lon)
if lat_f is None or lon_f is None:
return False
return -90.0 <= lat_f <= 90.0 and -180.0 <= lon_f <= 180.0


def haversine_km(lat1: float, lon1: float, lat2: float, lon2: float) -> float:
radius_km = 6371.0
lat1_rad = math.radians(lat1)
lon1_rad = math.radians(lon1)
lat2_rad = math.radians(lat2)
lon2_rad = math.radians(lon2)

dlat = lat2_rad - lat1_rad
dlon = lon2_rad - lon1_rad
a = (
math.sin(dlat / 2) ** 2
+ math.cos(lat1_rad) * math.cos(lat2_rad) * math.sin(dlon / 2) ** 2
)
c = 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a))
return radius_km * c


def flag_geo_suspects(
rows: list[dict[str, Any]],
lat_col: str,
lon_col: str,
threshold_km: float = 25,
) -> list[dict[str, Any]]:
flagged: list[dict[str, Any]] = []
prev_valid_coord: tuple[float, float] | None = None

for row in rows:
out = dict(row)
reasons: list[str] = []
lat_raw = row.get(lat_col)
lon_raw = row.get(lon_col)
lat = _coerce_float(lat_raw)
lon = _coerce_float(lon_raw)
distance_km: float | None = None

if lat is None or lon is None:
reasons.append(MISSING_OR_NON_NUMERIC)
elif not validate_lat_lon(lat, lon):
reasons.append(OUT_OF_RANGE)
else:
if prev_valid_coord is not None:
distance_km = haversine_km(prev_valid_coord[0], prev_valid_coord[1], lat, lon)
if distance_km >= float(threshold_km):
reasons.append(DISTANCE_THRESHOLD_EXCEEDED)
prev_valid_coord = (lat, lon)

out['is_suspect'] = bool(reasons)
out['suspect_reason'] = '|'.join(reasons)
out['distance_km'] = round(distance_km, 3) if distance_km is not None else None
flagged.append(out)

return flagged
86 changes: 86 additions & 0 deletions bitnet_tools/ui/app.js
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,11 @@ const UI = {
filterType: document.getElementById('filterType'),
insightList: document.getElementById('insightList'),
insightDrilldown: document.getElementById('insightDrilldown'),
geoLatCol: document.getElementById('geoLatCol'),
geoLonCol: document.getElementById('geoLonCol'),
geoThreshold: document.getElementById('geoThreshold'),
geoExtractBtn: document.getElementById('geoExtractBtn'),
geoResult: document.getElementById('geoResult'),
};

const STATUS = {
Expand Down Expand Up @@ -360,6 +365,7 @@ function toggleBusy(isBusy) {
UI.retryChartsJobBtn,
UI.switchToCsvBtn,
UI.candidateTableSelect,
UI.geoExtractBtn,
...document.querySelectorAll('.mode-btn'),
...document.querySelectorAll('.chip'),
];
Expand Down Expand Up @@ -955,6 +961,85 @@ async function runByIntent() {
setStatus('의도 라우팅 실패');
}



function renderGeoResult(data) {
if (!UI.geoResult) return;
const artifactLinks = Object.entries(data?.artifacts || {})
.map(([key, value]) => `${key}: ${value}`)
.join('\n');
UI.geoResult.textContent = [
`총 ${data?.count ?? 0}건 / 의심 ${data?.suspect_count ?? 0}건 / 정상 ${data?.normal_count ?? 0}건`,
`threshold_km=${data?.threshold_km ?? 25}`,
artifactLinks,
].filter(Boolean).join('\n');
}

async function runGeoSuspectExtract() {
const file = UI.csvFile?.files?.[0] || null;
const latCol = String(UI.geoLatCol?.value || '').trim();
const lonCol = String(UI.geoLonCol?.value || '').trim();
const threshold = Number(UI.geoThreshold?.value || 25);

if (!latCol || !lonCol) {
showError('위도/경도 컬럼명을 입력하세요.', 'geoLatCol/geoLonCol is empty');
return;
}

let payload;
if (file) {
const inputType = getInputTypeForFile(file);
if (inputType === 'excel') {
payload = {
input_type: 'excel',
source_name: file.name,
file_base64: await readFileAsBase64(file),
sheet_name: UI.sheetSelect?.value || '',
};
} else if (inputType === 'document') {
payload = {
input_type: 'document',
source_name: file.name,
file_base64: await readFileAsBase64(file),
table_index: Number(UI.sheetSelect?.value || 0),
};
} else {
payload = {
input_type: 'csv',
source_name: file.name,
normalized_csv_text: await file.text(),
};
}
} else {
payload = {
input_type: 'csv',
source_name: '<inline_csv>',
normalized_csv_text: UI.csvText?.value || '',
};
}

clearError();
try {
toggleBusy(true);
setStatus('Geo 의심 케이스 추출 중...');
const data = await postJson('/api/geo/suspects', {
...payload,
lat_col: latCol,
lon_col: lonCol,
threshold_km: Number.isFinite(threshold) ? threshold : 25,
inline: false,
include_geojson: false,
}, 'Geo 의심 케이스 추출');
renderGeoResult(data);
setStatus('Geo 의심 케이스 추출 완료');
} catch (err) {
showError(err.userMessage || 'Geo 의심 케이스 추출 실패', err.detail || '');
setStatus('Geo 의심 케이스 추출 실패');
} finally {
toggleBusy(false);
}
}

function bindEvents() {
document.querySelectorAll('.mode-btn').forEach((btn) => {
btn.addEventListener('click', () => setMode(btn.dataset.mode));
Expand Down Expand Up @@ -1019,6 +1104,7 @@ function bindEvents() {
UI.runBtn?.addEventListener('click', runModel);
UI.multiAnalyzeBtn?.addEventListener('click', runMultiAnalyze);
UI.startChartsJobBtn?.addEventListener('click', startChartsJob);
UI.geoExtractBtn?.addEventListener('click', runGeoSuspectExtract);
UI.retryChartsJobBtn?.addEventListener('click', retryChartsJob);
UI.retryPreprocessBtn?.addEventListener('click', async () => {
if (!appState.preprocessJob.payload) {
Expand Down
24 changes: 24 additions & 0 deletions bitnet_tools/ui/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,30 @@ <h2>3) 실행 상태</h2>
</details>
</section>


<section class="panel">
<h2>Geo 의심 케이스 추출</h2>
<p class="sub">지도 렌더링 없이 의심 케이스 파일(CSV/JSON)을 생성합니다.</p>
<div class="row">
<div>
<label>위도 컬럼명</label>
<input id="geoLatCol" placeholder="예: lat" />
</div>
<div>
<label>경도 컬럼명</label>
<input id="geoLonCol" placeholder="예: lon" />
</div>
<div>
<label>거리 임계값(km)</label>
<input id="geoThreshold" type="number" min="0" step="0.1" value="25" />
</div>
</div>
<div class="actions">
<button id="geoExtractBtn" type="button">의심 케이스 추출 다운로드</button>
</div>
<pre id="geoResult" aria-live="polite">Geo 추출 대기 중</pre>
</section>

<section class="panel">
<h2>4) 결과</h2>
<div id="analyzeAssist" class="analyze-assist" aria-live="polite" hidden>
Expand Down
110 changes: 110 additions & 0 deletions bitnet_tools/web.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@

from .analysis import build_analysis_payload_from_request
from .document_extract import extract_document_tables_from_base64, table_to_analysis_request
from .geo import flag_geo_suspects, validate_lat_lon
from .multi_csv import analyze_multiple_csv
from .planner import build_plan, execute_plan_from_csv_text, parse_question_to_intent
from .visualize import create_multi_charts
Expand Down Expand Up @@ -215,6 +216,65 @@ def _classify_preprocess_error(exc: Exception) -> str:
return 'parser_error'


def _rows_from_csv_text(csv_text: str) -> tuple[list[str], list[dict[str, Any]]]:
reader = csv.DictReader(io.StringIO(csv_text))
fieldnames = [str(name) for name in (reader.fieldnames or []) if name is not None]
if not fieldnames:
raise ValueError('csv header is required')
rows = [dict(row) for row in reader]
return fieldnames, rows


def _build_geojson_feature_collection(rows: list[dict[str, Any]], lat_col: str, lon_col: str) -> dict[str, Any]:
features: list[dict[str, Any]] = []
for row in rows:
if not validate_lat_lon(row.get(lat_col), row.get(lon_col)):
continue
lon = float(row[lon_col])
lat = float(row[lat_col])
feature_props = {k: v for k, v in row.items() if k not in {lat_col, lon_col}}
features.append({
'type': 'Feature',
'geometry': {'type': 'Point', 'coordinates': [lon, lat]},
'properties': feature_props,
})
return {'type': 'FeatureCollection', 'features': features}


def _write_geo_suspect_artifacts(
result_rows: list[dict[str, Any]],
fieldnames: list[str],
lat_col: str,
lon_col: str,
include_geojson: bool,
) -> dict[str, str]:
out_dir = Path('.bitnet_cache') / 'geo_suspects' / uuid.uuid4().hex
out_dir.mkdir(parents=True, exist_ok=True)

csv_path = out_dir / 'geo_suspects.csv'
json_path = out_dir / 'geo_suspects.json'
geojson_path = out_dir / 'geo_suspects.geojson'

ordered_fields = list(fieldnames)
for col in ['is_suspect', 'suspect_reason', 'distance_km']:
if col not in ordered_fields:
ordered_fields.append(col)

with csv_path.open('w', encoding='utf-8', newline='') as f:
writer = csv.DictWriter(f, fieldnames=ordered_fields)
writer.writeheader()
writer.writerows(result_rows)

json_path.write_text(json.dumps(result_rows, ensure_ascii=False, indent=2), encoding='utf-8')

artifacts = {'csv': str(csv_path), 'json': str(json_path)}
if include_geojson:
geojson = _build_geojson_feature_collection(result_rows, lat_col, lon_col)
geojson_path.write_text(json.dumps(geojson, ensure_ascii=False, indent=2), encoding='utf-8')
artifacts['geojson'] = str(geojson_path)
return artifacts


def _cleanup_expired_preprocess_jobs() -> None:
now = datetime.now(timezone.utc)
threshold = now - timedelta(seconds=PREPROCESS_JOB_TTL_SECONDS)
Expand Down Expand Up @@ -562,6 +622,56 @@ def do_POST(self) -> None:
return self._send_json({'job_id': job_id, 'status': 'queued'}, HTTPStatus.ACCEPTED)



if route == '/api/geo/suspects':
lat_col = str(payload.get('lat_col', '')).strip()
lon_col = str(payload.get('lon_col', '')).strip()
threshold_km = float(payload.get('threshold_km', 25) or 25)
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Preserve zero threshold values in geo suspects API

The threshold parsing uses float(payload.get('threshold_km', 25) or 25), which treats a valid numeric 0 as falsy and silently replaces it with 25. This changes user-requested behavior: when the client sends threshold_km: 0 (the UI allows this with min="0"), distances below 25 km are not flagged even though the request asked for a zero-distance cutoff.

Useful? React with 👍 / 👎.

include_geojson = bool(payload.get('include_geojson', False))
inline = bool(payload.get('inline', True))

if not lat_col or not lon_col:
return self._send_json(self._error_payload('lat_col and lon_col are required'), HTTPStatus.BAD_REQUEST)

file_payload = {
'input_type': str(payload.get('input_type', 'csv') or 'csv'),
'name': str(payload.get('source_name', '<inline_csv>') or '<inline_csv>'),
'normalized_csv_text': str(payload.get('normalized_csv_text', '') or ''),
'csv_text': str(payload.get('csv_text', '') or ''),
'file_base64': payload.get('file_base64', ''),
'sheet_name': payload.get('sheet_name', ''),
'table_index': payload.get('table_index', 0),
}
_, normalized_csv_text, _ = _coerce_csv_text_from_file_payload(file_payload)
fieldnames, rows = _rows_from_csv_text(normalized_csv_text)

if lat_col not in fieldnames or lon_col not in fieldnames:
return self._send_json(
self._error_payload('lat_col/lon_col not found in csv header', f'header={fieldnames}'),
HTTPStatus.BAD_REQUEST,
)

result_rows = flag_geo_suspects(rows, lat_col=lat_col, lon_col=lon_col, threshold_km=threshold_km)
suspect_count = sum(1 for row in result_rows if row.get('is_suspect'))
normal_count = len(result_rows) - suspect_count
artifacts = _write_geo_suspect_artifacts(
result_rows,
fieldnames,
lat_col=lat_col,
lon_col=lon_col,
include_geojson=include_geojson,
)
response = {
'count': len(result_rows),
'suspect_count': suspect_count,
'normal_count': normal_count,
'threshold_km': threshold_km,
'artifacts': artifacts,
}
if inline:
response['rows'] = result_rows
return self._send_json(response)

if route == "/api/multi-analyze":
files = payload.get("files", [])
question = str(payload.get("question", "")).strip() or "다중 CSV를 비교 분석해줘"
Expand Down
Loading