importorg.llm4s.llmconnect.LLMClientimportorg.llm4s.llmconnect.model._importorg.llm4s.types.Resultimportorg.llm4s.error.NetworkErrorimportorg.scalatest.flatspec.AnyFlatSpecimportorg.scalatest.matchers.should.MatchersclassWeatherAgentSpecextendsAnyFlatSpecwithMatchers{// Mock client that returns canned responsesclassMockLLMClientextendsLLMClient{overridedefcomplete(conversation:Conversation,options:CompletionOptions=CompletionOptions()):Result[Completion]={Right(Completion(id="mock-1",created=System.currentTimeMillis(),content="The weather in London is 15°C and cloudy.",model="mock-model",message=AssistantMessage("The weather in London is 15°C and cloudy."),usage=Some(TokenUsage(promptTokens=10,completionTokens=15,totalTokens=25))))}overridedefstreamComplete(conversation:Conversation,options:CompletionOptions=CompletionOptions(),onChunk:StreamedChunk=>Unit):Result[Completion]={valchunks=List(StreamedChunk(content=Some("The weather"),finishReason=None),StreamedChunk(content=Some(" is sunny"),finishReason=Some("stop")))chunks.foreach(onChunk)Right(Completion(id="mock-1",created=System.currentTimeMillis(),content="The weather is sunny",model="mock-model",message=AssistantMessage("The weather is sunny")))}}// Example agent that uses the LLM clientclassWeatherAgent(client:LLMClient){defrun(query:String):Result[Completion]={client.complete(Conversation(Seq(UserMessage(query))))}}"WeatherAgent"should"extract city from user query"in{valagent=newWeatherAgent(newMockLLMClient)valresult=agent.run("What's the weather in London?")resultmatch{caseRight(response)=>response.contentshouldinclude("London")response.contentshouldinclude("°C")caseLeft(error)=>fail(s"Expected success but got: $error")}}itshould"handle errors gracefully"in{classFailingMockClientextendsLLMClient{overridedefcomplete(conversation:Conversation,options:CompletionOptions=CompletionOptions()):Result[Completion]={Left(NetworkError("Connection timeout"))}overridedefstreamComplete(conversation:Conversation,options:CompletionOptions=CompletionOptions(),onChunk:StreamedChunk=>Unit):Result[Completion]={Left(NetworkError("Connection timeout"))}}valagent=newWeatherAgent(newFailingMockClient)valresult=agent.run("What's the weather?")resultmatch{caseLeft(_:NetworkError)=>succeedcaseother=>fail(s"Expected NetworkError but got: $other")}}}
importorg.llm4s.error.InvalidRequestErrorclassConfigurableMockClient(responses:Map[String, String])extendsLLMClient{overridedefcomplete(conversation:Conversation,options:CompletionOptions=CompletionOptions()):Result[Completion]={valuserMessage=conversation.messages.collectFirst{caseUserMessage(content)=>content}.getOrElse("")responses.get(userMessage)match{caseSome(responseText)=>Right(Completion(id="mock-1",created=System.currentTimeMillis(),content=responseText,model="mock-model",message=AssistantMessage(responseText)))caseNone=>Left(InvalidRequestError(s"No mock response for: $userMessage"))}}overridedefstreamComplete(conversation:Conversation,options:CompletionOptions=CompletionOptions(),onChunk:StreamedChunk=>Unit):Result[Completion]={complete(conversation,options).map{completion=>onChunk(StreamedChunk(content=Some(completion.content),finishReason=Some("stop")))completion}}}// Usage in testsvalmockResponses=Map("What is 2+2?"->"2+2 equals 4","What is the capital of France?"->"The capital of France is Paris")valclient=newConfigurableMockClient(mockResponses)
Integration Testing with Ollama
For integration tests, use Ollama to avoid API costs:
Setup
1
2
3
4
5
6
7
8
# Install Ollama
curl -fsSL https://ollama.com/install.sh | sh
# Pull a small, fast model
ollama pull llama3.2
# Start server
ollama serve
importorg.llm4s.config.Llm4sConfigimportorg.llm4s.llmconnect.LLMConnectimportorg.llm4s.llmconnect.model._importorg.scalatest.flatspec.AnyFlatSpecimportorg.scalatest.matchers.should.MatchersclassLLMIntegrationSpecextendsAnyFlatSpecwithMatchers{// Only run if Ollama is availabledefollamaAvailable:Boolean={try{valurl=newjava.net.URL("http://localhost:11434")valconnection=url.openConnection()connection.setConnectTimeout(1000)connection.connect()true}catch{case_:Exception=>false}}"LLMClient"should"complete basic requests"in{assume(ollamaAvailable,"Ollama server not available")valresult=for{config<-Llm4sConfig.provider()client<-LLMConnect.getClient(config)response<-client.complete(Conversation(Seq(UserMessage("Say 'hello' and nothing else"))))}yieldresponseresultmatch{caseRight(response)=>response.content.toLowerCaseshouldinclude("hello")caseLeft(error)=>fail(s"Request failed: $error")}}itshould"handle streaming responses"in{assume(ollamaAvailable,"Ollama server not available")varchunks=List.empty[StreamedChunk]valresult=for{config<-Llm4sConfig.provider()client<-LLMConnect.getClient(config)completion<-client.streamComplete(Conversation(Seq(UserMessage("Count: 1, 2, 3")))){chunk=>chunks=chunks:+chunk}}yieldcompletionresultmatch{caseRight(completion)=>chunksshouldnotbeemptychunks.last.finishReasonshouldbe(Some("stop"))caseLeft(error)=>fail(s"Streaming failed: $error")}}}
importorg.llm4s.error.{RateLimitError,AuthenticationError,NetworkError}classErrorHandlingSpecextendsAnyFlatSpecwithMatchers{"Agent"should"handle rate limiting"in{classRateLimitedClientextendsLLMClient{overridedefcomplete(conversation:Conversation,options:CompletionOptions=CompletionOptions()):Result[Completion]={Left(RateLimitError("Rate limit exceeded"))}overridedefstreamComplete(conversation:Conversation,options:CompletionOptions=CompletionOptions(),onChunk:StreamedChunk=>Unit):Result[Completion]={Left(RateLimitError("Rate limit exceeded"))}}valagent=newAgent(newRateLimitedClient)valresult=agent.run("test query",tools=ToolRegistry.empty)resultmatch{caseLeft(_:RateLimitError)=>succeedcaseother=>fail(s"Expected RateLimitError but got: $other")}}itshould"handle authentication errors"in{classUnauthorizedClientextendsLLMClient{overridedefcomplete(conversation:Conversation,options:CompletionOptions=CompletionOptions()):Result[Completion]={Left(AuthenticationError("Invalid API key"))}overridedefstreamComplete(conversation:Conversation,options:CompletionOptions=CompletionOptions(),onChunk:StreamedChunk=>Unit):Result[Completion]={Left(AuthenticationError("Invalid API key"))}}valagent=newAgent(newUnauthorizedClient)valresult=agent.run("test",tools=ToolRegistry.empty)result.isLeftshouldBetrue}itshould"handle network timeouts"in{classTimeoutClientextendsLLMClient{overridedefcomplete(conversation:Conversation,options:CompletionOptions=CompletionOptions()):Result[Completion]={Thread.sleep(5000)// Simulate timeoutLeft(NetworkError("Request timeout"))}overridedefstreamComplete(conversation:Conversation,options:CompletionOptions=CompletionOptions(),onChunk:StreamedChunk=>Unit):Result[Completion]={Left(NetworkError("Request timeout"))}}valagent=newAgent(newTimeoutClient)valresult=agent.run("test",tools=ToolRegistry.empty)resultmatch{caseLeft(_:NetworkError)=>succeedcaseother=>fail(s"Expected NetworkError but got: $other")}}}
Testing Tool Calling
Test that tools are invoked correctly.
Note: This example uses simplified Tool API for clarity. In production, use ToolBuilder and ToolFunction from the org.llm4s.toolapi package. See the Tools documentation for actual API.
classToolCallingSpecextendsAnyFlatSpecwithMatchers{"Agent"should"invoke weather tool"in{vartoolWasCalled=falsevarcapturedCity:Option[String]=None// Simplified tool example for testing conceptsvalweatherTool=newTool{overridedefname:String="get_weather"overridedefdescription:String="Get weather for a city"overridedefparameters:ToolParameters=ToolParameters(properties=Map("city"->Property("string","City name")))overridedefexecute(args:Map[String, Any]):Result[String]={toolWasCalled=truecapturedCity=args.get("city").map(_.toString)Right(s"Weather in ${capturedCity.getOrElse("unknown")}: 20°C")}}// Mock client that calls the toolclassToolCallingMockextendsLLMClient{overridedefcomplete(conversation:Conversation,options:CompletionOptions=CompletionOptions()):Result[Completion]={Right(Completion(id="mock-1",created=System.currentTimeMillis(),content="",model="mock-model",message=AssistantMessage(""),toolCalls=List(ToolCall(id="call_1",name="get_weather",arguments=Map("city"->"London")))))}overridedefstreamComplete(conversation:Conversation,options:CompletionOptions=CompletionOptions(),onChunk:StreamedChunk=>Unit):Result[Completion]={complete(conversation,options)}}valtools=newToolRegistry(List(weatherTool))valagent=newAgent(newToolCallingMock)agent.run("What's the weather in London?",tools)toolWasCalledshouldBetruecapturedCityshouldBeSome("London")}}
Testing RAG Applications
Test document retrieval and answer generation separately:
classRAGSpecextendsAnyFlatSpecwithMatchers{"VectorStore"should"retrieve relevant documents"in{valdocuments=List("Scala is a functional programming language","Python is a dynamically typed language","Java runs on the JVM")// Note: This is conceptual pseudocode showing testing patterns.// LLM4S does not currently include vector store implementations.// Use your preferred vector store library (e.g., Pinecone, Milvus, ChromaDB).valvectorStore=newInMemoryVectorStore()// Pseudocode - use your vector storedocuments.foreach(doc=>vectorStore.add(doc,embedder.embed(doc)))// embedder is conceptualvalresults=vectorStore.search("functional programming",topK=1)results.headshouldinclude("Scala")}"RAG pipeline"should"include context in LLM prompt"in{classRAGMockClientextendsLLMClient{varlastPrompt:Option[String]=Noneoverridedefcomplete(conversation:Conversation,options:CompletionOptions=CompletionOptions()):Result[Completion]={lastPrompt=conversation.messages.collectFirst{caseUserMessage(content)=>content}Right(Completion(id="mock-1",created=System.currentTimeMillis(),content="Based on the context, Scala is functional.",model="mock-model",message=AssistantMessage("Based on the context, Scala is functional.")))}overridedefstreamComplete(conversation:Conversation,options:CompletionOptions=CompletionOptions(),onChunk:StreamedChunk=>Unit):Result[Completion]={complete(conversation,options)}}valmockClient=newRAGMockClientvalrag=newRAGPipeline(mockClient,vectorStore,embedder)rag.query("What is Scala?")mockClient.lastPrompt.getshouldinclude("context")mockClient.lastPrompt.getshouldinclude("functional")}}
# .github/workflows/test.ymlname:Teston:[push,pull_request]jobs:test:runs-on:ubuntu-lateststeps:-uses:actions/checkout@v3-name:Setup Scalauses:olafurpg/setup-scala@v14with:java-version:21-name:Install Ollamarun:|curl -fsSL https://ollama.com/install.sh | shollama serve &sleep 5ollama pull llama3.2-name:Run unit tests (fast)run:sbt "testOnly *UnitSpec"-name:Run integration tests (with Ollama)run:sbt "testOnly *IntegrationSpec"env:LLM_MODEL:ollama/llama3.2# Skip expensive tests in CI-name:Run full test suiterun:sbt testif:github.event_name == 'push' && github.ref == 'refs/heads/main'
Test Categorization
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
// Tag tests by speed/costimportorg.scalatest.TagobjectUnitTestextendsTag("UnitTest")objectIntegrationTestextendsTag("IntegrationTest")objectExpensiveTestextendsTag("ExpensiveTest")classFastSpecextendsAnyFlatSpec{"Fast unit test"should"run in CI"taggedAsUnitTestin{// Mock-based test}}classSlowSpecextendsAnyFlatSpec{"Expensive test"should"run manually"taggedAsExpensiveTestin{// Uses real OpenAI API}}
Run specific test categories:
1
2
3
4
5
# Fast tests only
sbt "testOnly * -- -n UnitTest"# Everything except expensive tests
sbt "testOnly * -- -l ExpensiveTest"
Best Practices
✅ Mock by default: Use mock clients for unit tests
✅ Ollama for integration: Free and fast enough for CI
✅ Test behaviors, not outputs: LLM responses vary, so test that tools are called, documents are retrieved, etc.
✅ Use deterministic models when possible: Set temperature=0 for more predictable outputs
✅ Separate concerns: Test tool logic independently from LLM integration
✅ Tag expensive tests: Don’t run them in every CI build
✅ Use smaller models in CI: llama3.2 is fast and free via Ollama