Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 35 additions & 0 deletions AGENTS.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,41 @@

After each successfully executed programming instruction, the app must be built and submitted in the following way:

If you are not Jules, or if the Android SDK is not already installed, then you will need to install it first:

```
#!/bin/bash

# Exit on any error
set -e

# 1. Set up the Android SDK directory
echo "Setting up Android SDK directory..."
mkdir -p android_sdk
echo "sdk.dir=android_sdk" > local.properties

# Add android_sdk to .gitignore if it's not already there
if ! grep -q "android_sdk/" .gitignore; then
echo "android_sdk/" >> .gitignore
fi

# 2. Download and unzip the Android SDK command-line tools
echo "Downloading and unzipping Android SDK..."
wget -q https://dl.google.com/android/repository/commandlinetools-linux-13114758_latest.zip
unzip -q commandlinetools-linux-13114758_latest.zip -d android_sdk
rm commandlinetools-linux-13114758_latest.zip

# 3. Restructure cmdline-tools for sdkmanager
echo "Restructuring cmdline-tools..."
mkdir -p android_sdk/cmdline-tools/latest
mv android_sdk/cmdline-tools/* android_sdk/cmdline-tools/latest 2>/dev/null || true

# 4. Install required SDK packages
echo "Installing SDK packages..."
yes | android_sdk/cmdline-tools/latest/bin/sdkmanager --licenses > /dev/null
android_sdk/cmdline-tools/latest/bin/sdkmanager "platforms;android-35" "build-tools;35.0.0" "platform-tools"
```

### 1. Build the Unsigned APK

echo "Building the application..."
Expand Down
Binary file modified app-release-signed.apk
Binary file not shown.
Binary file modified app-release-signed.apk.idsig
Binary file not shown.
9 changes: 8 additions & 1 deletion app/src/main/kotlin/com/google/ai/sample/MenuScreen.kt
Original file line number Diff line number Diff line change
Expand Up @@ -274,7 +274,14 @@ fun MenuScreen(
.padding(horizontal = 16.dp, vertical = 8.dp)
) {
val annotatedText = buildAnnotatedString {
append("Preview models could be deactivated by Google without being handed over to the final release. Gemma 3n E4B it cannot handle screenshots in the API. There are rate limits for free use of Gemini models. The less powerful the models are, the more you can use them. The limits range from a maximum of 5 to 30 calls per minute. After each screenshot (every 2-3 seconds) the LLM must respond again. More information is available at ")
append("• Preview models could be deactivated by Google without being handed over to the final release.\\n")
append("• GPT-oss 120b is a pure text model.\\n")
append("• Gemma 3n E4B it cannot handle screenshots in the API.\\n")
append("• GPT models (Vercel) have a free budget of $5 per month.\\n")
append("GPT-5.1 Input: $1.25/M Output: $10.00/M\\n")
append("GPT-5.1 mini Input: $0.25/ M Output: $2.00/M\\n")
append("GPT-5 nano Input: $0.05/M Output: $0.40/M\\n")
append("• There are rate limits for free use of Gemini models. The less powerful the models are, the more you can use them. The limits range from a maximum of 5 to 30 calls per minute. After each screenshot (every 2-3 seconds) the LLM must respond again. More information is available at ")

pushStringAnnotation(tag = "URL", annotation = "https://ai.google.dev/gemini-api/docs/rate-limits")
withStyle(style = SpanStyle(color = MaterialTheme.colorScheme.primary, textDecoration = TextDecoration.Underline)) {
Expand Down
157 changes: 144 additions & 13 deletions app/src/main/kotlin/com/google/ai/sample/ScreenCaptureService.kt
Original file line number Diff line number Diff line change
Expand Up @@ -40,11 +40,20 @@ import kotlinx.coroutines.Dispatchers
import kotlinx.coroutines.SupervisorJob
import kotlinx.coroutines.cancel
import kotlinx.coroutines.launch
import kotlinx.serialization.SerialName
import kotlinx.serialization.Serializable
import kotlinx.serialization.json.Json
import kotlinx.serialization.decodeFromString
import kotlinx.serialization.MissingFieldException
import kotlinx.serialization.json.JsonClassDiscriminator
import kotlinx.serialization.modules.SerializersModule
import kotlinx.serialization.modules.polymorphic
import androidx.core.app.NotificationCompat
import androidx.localbroadcastmanager.content.LocalBroadcastManager
import okhttp3.MediaType.Companion.toMediaType
import okhttp3.OkHttpClient
import okhttp3.Request
import okhttp3.RequestBody.Companion.toRequestBody
import java.io.File
import java.io.FileOutputStream
import java.text.SimpleDateFormat
Expand All @@ -70,6 +79,7 @@ class ScreenCaptureService : Service() {
const val EXTRA_AI_CHAT_HISTORY_JSON = "com.google.ai.sample.EXTRA_AI_CHAT_HISTORY_JSON"
const val EXTRA_AI_MODEL_NAME = "com.google.ai.sample.EXTRA_AI_MODEL_NAME" // For service to create model
const val EXTRA_AI_API_KEY = "com.google.ai.sample.EXTRA_AI_API_KEY" // For service to create model
const val EXTRA_AI_API_PROVIDER = "com.google.ai.sample.EXTRA_AI_API_PROVIDER" // For service to select API
const val EXTRA_TEMP_FILE_PATHS = "com.google.ai.sample.EXTRA_TEMP_FILE_PATHS"


Expand Down Expand Up @@ -189,6 +199,8 @@ class ScreenCaptureService : Service() {
val chatHistoryJson = intent.getStringExtra(EXTRA_AI_CHAT_HISTORY_JSON)
val modelName = intent.getStringExtra(EXTRA_AI_MODEL_NAME)
val apiKey = intent.getStringExtra(EXTRA_AI_API_KEY)
val apiProviderString = intent.getStringExtra(EXTRA_AI_API_PROVIDER)
val apiProvider = ApiProvider.valueOf(apiProviderString ?: ApiProvider.GOOGLE.name)
val tempFilePaths = intent.getStringArrayListExtra(EXTRA_TEMP_FILE_PATHS) ?: ArrayList()
Log.d(TAG, "Received tempFilePaths for cleanup: $tempFilePaths")

Expand Down Expand Up @@ -253,22 +265,28 @@ class ScreenCaptureService : Service() {
}
}
try {
val generativeModel = GenerativeModel(
modelName = modelName,
apiKey = apiKey
)
val tempChat = generativeModel.startChat(history = chatHistory)
val fullResponse = StringBuilder()
tempChat.sendMessageStream(inputContent).collect { chunk ->
chunk.text?.let {
fullResponse.append(it)
val streamIntent = Intent(ACTION_AI_STREAM_UPDATE).apply {
putExtra(EXTRA_AI_STREAM_CHUNK, it)
if (apiProvider == ApiProvider.VERCEL) {
val result = callVercelApi(modelName, apiKey, chatHistory, inputContent)
responseText = result.first
errorMessage = result.second
} else {
val generativeModel = GenerativeModel(
modelName = modelName,
apiKey = apiKey
)
val tempChat = generativeModel.startChat(history = chatHistory)
val fullResponse = StringBuilder()
tempChat.sendMessageStream(inputContent).collect { chunk ->
chunk.text?.let {
fullResponse.append(it)
val streamIntent = Intent(ACTION_AI_STREAM_UPDATE).apply {
putExtra(EXTRA_AI_STREAM_CHUNK, it)
}
LocalBroadcastManager.getInstance(applicationContext).sendBroadcast(streamIntent)
}
LocalBroadcastManager.getInstance(applicationContext).sendBroadcast(streamIntent)
}
responseText = fullResponse.toString()
}
responseText = fullResponse.toString()
} catch (e: MissingFieldException) {
Log.e(TAG, "Serialization error, potentially a 503 error.", e)
// Check if the error message indicates a 503-like error
Expand Down Expand Up @@ -680,3 +698,116 @@ class ScreenCaptureService : Service() {

override fun onBind(intent: Intent?): IBinder? = null
}

// Data classes for Vercel API
@Serializable
data class VercelRequest(
val model: String,
val messages: List<VercelMessage>
)

@Serializable
data class VercelMessage(
val role: String,
val content: List<VercelContent>
)

@Serializable
data class VercelResponse(
val choices: List<VercelChoice>
)

@Serializable
data class VercelChoice(
val message: VercelResponseMessage
)

@Serializable
data class VercelResponseMessage(
val role: String,
val content: String
)

@Serializable
@JsonClassDiscriminator("type")
sealed class VercelContent

@Serializable
@SerialName("text")
data class VercelTextContent(val text: String) : VercelContent()

@Serializable
@SerialName("image_url")
data class VercelImageContent(val image_url: VercelImageUrl) : VercelContent()

@Serializable
data class VercelImageUrl(val url: String)

private fun Bitmap.toBase64(): String {
val outputStream = java.io.ByteArrayOutputStream()
this.compress(Bitmap.CompressFormat.JPEG, 80, outputStream)
return "data:image/jpeg;base64," + android.util.Base64.encodeToString(outputStream.toByteArray(), android.util.Base64.DEFAULT)
}

private suspend fun callVercelApi(modelName: String, apiKey: String, chatHistory: List<Content>, inputContent: Content): Pair<String?, String?> {
var responseText: String? = null
var errorMessage: String? = null

val json = Json {
serializersModule = SerializersModule {
polymorphic(VercelContent::class) {
subclass(VercelTextContent::class, VercelTextContent.serializer())
subclass(VercelImageContent::class, VercelImageContent.serializer())
}
}
ignoreUnknownKeys = true
}

try {
val messages = (chatHistory + inputContent).map { content ->
val parts = content.parts.map { part ->
when (part) {
is TextPart -> VercelTextContent(text = part.text)
is ImagePart -> VercelImageContent(image_url = VercelImageUrl(url = part.image.toBase64()))
else -> VercelTextContent(text = "") // Or handle other part types appropriately
}
}
VercelMessage(role = if (content.role == "user") "user" else "assistant", content = parts)
}

val requestBody = VercelRequest(
model = modelName,
messages = messages
)

val client = OkHttpClient()
val mediaType = "application/json".toMediaType()
val jsonBody = json.encodeToString(VercelRequest.serializer(), requestBody)

val request = Request.Builder()
.url("https://api.vercel.ai/v1/chat/completions")
.post(jsonBody.toRequestBody(mediaType))
.addHeader("Content-Type", "application/json")
.addHeader("Authorization", "Bearer $apiKey")
.build()

client.newCall(request).execute().use { response ->
if (!response.isSuccessful) {
errorMessage = "Unexpected code ${response.code} - ${response.body?.string()}"
} else {
val responseBody = response.body?.string()
if (responseBody != null) {
val json = Json { ignoreUnknownKeys = true }
val vercelResponse = json.decodeFromString(VercelResponse.serializer(), responseBody)
responseText = vercelResponse.choices.firstOrNull()?.message?.content ?: "No response from model"
} else {
errorMessage = "Empty response body"
}
}
}
} catch (e: Exception) {
errorMessage = e.localizedMessage ?: "Vercel API call failed"
}

return Pair(responseText, errorMessage)
}
Original file line number Diff line number Diff line change
Expand Up @@ -859,6 +859,7 @@ class PhotoReasoningViewModel(
putExtra(ScreenCaptureService.EXTRA_AI_API_KEY, apiKey)
// Add the new extra for file paths
putStringArrayListExtra(ScreenCaptureService.EXTRA_TEMP_FILE_PATHS, tempFilePaths)
putExtra(ScreenCaptureService.EXTRA_AI_API_PROVIDER, currentModel.apiProvider.name)
}
context.startService(serviceIntent)
Log.d(TAG, "sendMessageWithRetry: Sent intent to ScreenCaptureService to execute AI call.")
Expand Down
Empty file modified build_and_sign.sh
100644 → 100755
Empty file.