News Intelligence Agent - 뉴스 인텔리전스

Role

/news 페이지에서 배치로 여러 뉴스를 동시 분석하여 시장 전체 흐름을 파악합니다.

Core Capabilities

1. Batch News Processing

async def analyze_batch(
    news_articles: List[NewsArticle],
    batch_size: int = 50
) -> Dict:
    """Process multiple articles in parallel"""
    
    results = []
    
    # Process in batches to avoid API rate limits
    for i in range(0, len(news_articles), batch_size):
        batch = news_articles[i:i+batch_size]
        
        # Parallel processing
        batch_results = await asyncio.gather(*[
            analyze_single_article(article)
            for article in batch
        ])
        
        results.extend(batch_results)
    
    return aggregate_batch_results(results)

2. Sentiment Analysis

Sentiment Scoring

def calculate_sentiment(text: str) -> float:
    """Calculate sentiment score -1 to +1"""
    
    # Positive keywords
    positive = ["surge", "beat", "record", "growth", "bullish", "upgrade"]
    
    # Negative keywords
    negative = ["plunge", "miss", "loss", "decline", "bearish", "downgrade"]
    
    # Count occurrences
    pos_count = sum(text.lower().count(word) for word in positive)
    neg_count = sum(text.lower().count(word) for word in negative)
    
    # Normalize
    total = pos_count + neg_count
    if total == 0:
        return 0.0
    
    sentiment = (pos_count - neg_count) /total
    
    # Clamp to [-1, 1]
    return max(-1.0, min(1.0, sentiment))

Sentiment Categories

SENTIMENT_LEVELS = {
    "VERY_POSITIVE": (0.6, 1.0),
    "POSITIVE": (0.3, 0.6),
    "NEUTRAL": (-0.3, 0.3),
    "NEGATIVE": (-0.6, -0.3),
    "VERY_NEGATIVE": (-1.0, -0.6)
}

3. Keyword Extraction

from sklearn.feature_extraction.text import TfidfVectorizer

def extract_keywords(texts: List[str], top_n: int = 10) -> List[str]:
    """Extract important keywords using TF-IDF"""
    
    vectorizer = TfidfVectorizer(
        max_features=top_n,
        stop_words='english',
        ngram_range=(1, 2)  # Unigrams and bigrams
    )
    
    tfidf_matrix = vectorizer.fit_transform(texts)
    
    feature_names = vectorizer.get_feature_names_out()
    
    # Get top keywords
    scores = tfidf_matrix.sum(axis=0).A1
    top_indices = scores.argsort()[-top_n:][::-1]
    
    keywords = [feature_names[i] for i in top_indices]
    
    return keywords

4. Theme Detection

from sklearn.cluster import KMeans
from sklearn.feature_extraction.text import TfidfVectorizer

def detect_themes(articles: List[str], n_themes: int = 5) -> List[Dict]:
    """Cluster articles into themes"""
    
    # Vectorize
    vectorizer = TfidfVectorizer(max_features=100, stop_words='english')
    tfidf_matrix = vectorizer.fit_transform(articles)
    
    # Cluster
    kmeans = KMeans(n_clusters=n_themes, random_state=42)
    kmeans.fit(tfidf_matrix)
    
    # Extract theme keywords
    themes = []
    feature_names = vectorizer.get_feature_names_out()
    
    for i, cluster_center in enumerate(kmeans.cluster_centers_):
        top_indices = cluster_center.argsort()[-5:][::-1]
        theme_keywords = [feature_names[idx] for idx in top_indices]
        
        # Count articles in theme
        article_count = (kmeans.labels_ == i).sum()
        
        themes.append({
            "theme_id": i,
            "keywords": theme_keywords,
            "article_count": article_count,
            "theme_name": generate_theme_name(theme_keywords)
        })
    
    return sorted(themes, key=lambda x: x['article_count'], reverse=True)

5. Ticker Buzz Score

def calculate_ticker_buzz(
    ticker: str,
    news_articles: List[NewsArticle],
    timeframe_hours: int = 24
) -> Dict:
    """Calculate how much a ticker is being discussed"""
    
    # Filter articles mentioning ticker
    ticker_articles = [
        a for a in news_articles
        if ticker in (a.ticker or '') or ticker in (a.content or '').upper()
    ]
    
    # Recency weight (more recent = higher weight)
    now = datetime.now()
    weighted_mentions = 0
    
    for article in ticker_articles:
        hours_ago = (now - article.created_at).total_seconds() / 3600
        
        if hours_ago <= 0 timeframe_hours:
            # Exponential decay
            weight = math.exp(-hours_ago / (timeframe_hours / 2))
            weighted_mentions += weight
    
    # Normalize to 0-100 scale
    buzz_score = min(100, weighted_mentions * 10)
    
    # Sentiment breakdown
    sentiments = [a.sentiment_score for a in ticker_articles if a.sentiment_score]
    avg_sentiment = sum(sentiments) / len(sentiments) if sentiments else 0
    
    return {
        "ticker": ticker,
        "buzz_score": buzz_score,
        "mention_count": len(ticker_articles),
        "avg_sentiment": avg_sentiment,
        "timeframe_hours": timeframe_hours,
        "trending": "UP" if buzz_score > 50 else "NORMAL"
    }

Decision Framework

Step 1: Fetch News Articles
  articles = db.query(NewsArticle).filter(
    NewsArticle.created_at >= datetime.now() - timedelta(hours=24)
  ).all()

Step 2: Batch Sentiment Analysis
  FOR each article in articles:
    sentiment = calculate_sentiment(article.content)
    article.sentiment_score = sentiment
    article.sentiment_label = categorize_sentiment(sentiment)

Step 3: Extract Keywords
  all_text = [a.content for a in articles]
  keywords = extract_keywords(all_text, top_n=20)

Step 4: Detect Themes
  themes = detect_themes([a.headline + ' ' + a.content for a in articles])

Step 5: Calculate Ticker Buzz
  unique_tickers = set(a.ticker for a in articles if a.ticker)
  
  buzz_scores = {}
  FOR ticker in unique_tickers:
    buzz_scores[ticker] = calculate_ticker_buzz(ticker, articles)

Step 6: Aggregate Results
  return {
    "total_articles": len(articles),
    "sentiment_distribution": count_by_sentiment(articles),
    "top_keywords": keywords,
    "trending_themes": themes,
    "ticker_buzz": buzz_scores,
    "timestamp": datetime.now()
  }

Output Format

{
  "analysis_timestamp": "2025-12-21T13:00:00Z",
  "timeframe": "last_24_hours",
  "total_articles_analyzed": 237,
  
  "sentiment_distribution": {
    "VERY_POSITIVE": 45,
    "POSITIVE": 89,
    "NEUTRAL": 67,
    "NEGATIVE": 28,
    "VERY_NEGATIVE": 8
  },
  
  "market_sentiment_summary": {
    "overall_score": 0.32,
    "overall_label": "POSITIVE",
    "confidence": 0.85,
    "interpretation": "시장 전반적으로 긍정적 뉴스 우세"
  },
  
  "top_keywords": [
    {
      "keyword": "ai growth",
      "frequency": 67,
      "importance_score": 0.92
    },
    {
      "keyword": "earnings beat",
      "frequency": 54,
      "importance_score": 0.88
    },
    {
      "keyword": "fed rate",
      "frequency": 48,
      "importance_score": 0.85
    },
    {
      "keyword": "semiconductor",
      "frequency": 42,
      "importance_score": 0.80
    },
    {
      "keyword": "tech rally",
      "frequency": 38,
      "importance_score": 0.75
    }
  ],
  
  "trending_themes": [
    {
      "theme_id": 0,
      "theme_name": "AI 붐",
      "keywords": ["ai", "chip", "nvidia", "demand", "growth"],
      "article_count": 78,
      "avg_sentiment": 0.68,
      "interpretation": "AI 관련 긍정적 뉴스 주도"
    },
    {
      "theme_id": 1,
      "theme_name": "Fed 금리 논의",
      "keywords": ["fed", "rate", "inflation", "policy", "powell"],
      "article_count": 56,
      "avg_sentiment": 0.12,
      "interpretation": "금리 관련 중립적 논의"
    },
    {
      "theme_id": 2,
      "theme_name": "실적 시즌",
      "keywords": ["earnings", "beat", "guidance", "revenue", "profit"],
      "article_count": 43,
      "avg_sentiment": 0.45,
      "interpretation": "실적 호조 뉴스 다수"
    }
  ],
  
  "ticker_buzz_rankings": [
    {
      "rank": 1,
      "ticker": "NVDA",
      "buzz_score": 92,
      "mention_count": 45,
      "avg_sentiment": 0.75,
      "trending": "UP",
      "summary": "AI 수요 급증 관련 압도적 언급"
    },
    {
      "rank": 2,
      "ticker": "AAPL",
      "buzz_score": 78,
      "mention_count": 38,
      "avg_sentiment": 0.58,
      "trending": "UP",
      "summary": "iPhone 판매 호조 뉴스"
    },
    {
      "rank": 3,
      "ticker": "TSLA",
      "buzz_score": 65,
      "mention_count": 32,
      "avg_sentiment": -0.25,
      "trending": "UP",
      "summary": "가격 인하 관련 우려 섞인 논의"
    }
  ],
  
  "sector_sentiment": {
    "Technology": {
      "article_count": 128,
      "avg_sentiment": 0.52,
      "label": "POSITIVE",
      "top_tickers": ["NVDA", "AAPL", "MSFT"]
    },
    "Finance": {
      "article_count": 45,
      "avg_sentiment": 0.18,
      "label": "NEUTRAL",
      "top_tickers": ["JPM", "BAC", "GS"]
    },
    "Healthcare": {
      "article_count": 34,
      "avg_sentiment": 0.35,
      "label": "POSITIVE",
      "top_tickers": ["JNJ", "PFE", "MRNA"]
    }
  },
  
  "alerts": [
    {
      "type": "HIGH_BUZZ",
      "ticker": "NVDA",
      "message": "NVDA buzz score 92 (매우 높음)",
      "severity": "INFO"
    },
    {
      "type": "SENTIMENT_SPIKE",
      "theme": "AI 붐",
      "message": "AI 관련 뉴스 sentiment +0.68 (매우 긍정)",
      "severity": "INFO"
    }
  ]
}

Examples

Example 1: Tech Rally Day

Input: 237 articles (last 24h)

Output:
- Overall Sentiment: +0.45 (POSITIVE)
- Top Theme: "AI Growth" (78 articles)
- Top Buzz: NVDA (92), AAPL (78), MSFT (65)
- Keywords: "ai growth", "earnings beat", "chip demand"

Example 2: Market Correction Day

Input: 189 articles

Output:
- Overall Sentiment: -0.38 (NEGATIVE)
- Top Theme: "Fed Rate Hike Fears" (92 articles)
- Top Buzz: SPY (88), VIX (76), TLT (54)
- Keywords: "rate hike", "inflation", "recession fears"

Guidelines

Do's ✅

배치 처리: 효율성 극대화
Ticker Buzz 추적: 시장 주목도 파악
Theme Detection: 숨겨진 패턴 발견
Sector Breakdown: 섹터별 sentiment

Don'ts ❌

단일 기사만 분석 금지 (Quick/Deep Reasoning 역할)
Theme 너무 세분화 금지 (5개 이내)
Buzz score 과신 금지 (quality over quantity)
Historical context 무시 금지

Integration

Batch Processing Endpoint

@router.post("/api/news/batch-analyze")
async def batch_analyze_news(
    timeframe_hours: int = 24,
    db: Session = Depends(get_db)
):
    """Batch analyze recent news"""
    
    # Fetch articles
    cutoff = datetime.now() - timedelta(hours=timeframe_hours)
    articles = db.query(NewsArticle).filter(
        NewsArticle.created_at >= cutoff
    ).all()
    
    # Run News Intelligence Agent
    agent = NewsIntelligenceAgent()
    
    result = await agent.execute({
        'articles': articles,
        'timeframe_hours': timeframe_hours
    })
    
    return result

Real-Time Updates (WebSocket)

from fastapi import WebSocket

@router.websocket("/ws/news-intel")
async def news_intel_websocket(websocket: WebSocket):
    """Stream news intelligence updates"""
    
    await websocket.accept()
    
    while True:
        # Run analysis every 5 minutes
        result = await batch_analyze_news(timeframe_hours=1)
        
        await websocket.send_json(result)
        
        await asyncio.sleep(300)  # 5 minutes

Performance Metrics

Batch Processing Speed: 목표 < 10초 for 100 articles
Sentiment Accuracy: > 80%
Theme Detection Quality: > 75% (사람 판단과 일치)
Ticker Buzz Precision: > 85%

Comparison

Agent	Scope	Speed	Use Case
News Intelligence	배치 (100+ articles)	10초	시장 전체 흐름
Quick Analyzer	단일 ticker	5초	개별 종목 확인
Deep Reasoning	단일 news	30초	중요한 뉴스 심층 분석

Version History

v1.0 (2025-12-21): Initial release with batch processing and theme detection

news-intelligence-agent

News Intelligence Agent - 뉴스 인텔리전스

Role

Core Capabilities

1. Batch News Processing

2. Sentiment Analysis

Sentiment Scoring

Sentiment Categories

3. Keyword Extraction

4. Theme Detection

5. Ticker Buzz Score

Decision Framework

Output Format

Examples

Guidelines

Do's ✅

Don'ts ❌

Integration

Batch Processing Endpoint

Real-Time Updates (WebSocket)

Performance Metrics

Comparison

Version History