Skip to content

Commit be40883

Browse files
committed
feat(agent): Improve PhoneAgent stability and add research docs
- Enhance PhoneAgent with better error handling and retry logic - Refactor input handling (KeyboardHelper, TextInputManager) - Clean up unused code across multiple modules - Add multi-model agent architecture research documentation - Bump version to 0.0.3
1 parent 9687991 commit be40883

34 files changed

Lines changed: 424 additions & 254 deletions

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,5 +55,8 @@ keystore_base64.txt
5555
# Logcat dumps
5656
logcat*
5757

58+
# AutoGLM logs
59+
autoglm_logs_*/
60+
5861
# Dev profiles config
5962
dev_profiles.json

app/build.gradle.kts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,8 @@ android {
1212
applicationId = "com.kevinluo.autoglm"
1313
minSdk = 24
1414
targetSdk = 34
15-
versionCode = 2
16-
versionName = "0.0.2"
15+
versionCode = 3
16+
versionName = "0.0.3"
1717

1818
testInstrumentationRunner = "androidx.test.runner.AndroidJUnitRunner"
1919
}

app/src/main/java/com/kevinluo/autoglm/ComponentManager.kt

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ import com.kevinluo.autoglm.screenshot.ScreenshotService
1717
import com.kevinluo.autoglm.settings.SettingsManager
1818
import com.kevinluo.autoglm.ui.FloatingWindowService
1919
import com.kevinluo.autoglm.util.HumanizedSwipeGenerator
20+
import com.kevinluo.autoglm.util.Logger
2021

2122
/**
2223
* Centralized component manager for dependency injection and lifecycle management.
@@ -27,7 +28,6 @@ import com.kevinluo.autoglm.util.HumanizedSwipeGenerator
2728
* - Lifecycle-aware component management
2829
* - Clean separation of concerns
2930
*
30-
* Requirements: All (integration)
3131
*/
3232
class ComponentManager private constructor(private val context: Context) {
3333

@@ -165,7 +165,7 @@ class ComponentManager private constructor(private val context: Context) {
165165
* @param service The connected UserService
166166
*/
167167
fun onServiceConnected(service: IUserService) {
168-
android.util.Log.i(TAG, "UserService connected, initializing components")
168+
Logger.i(TAG, "UserService connected, initializing components")
169169
userService = service
170170
initializeServiceDependentComponents()
171171
}
@@ -175,7 +175,7 @@ class ComponentManager private constructor(private val context: Context) {
175175
* Cleans up service-dependent components.
176176
*/
177177
fun onServiceDisconnected() {
178-
android.util.Log.i(TAG, "UserService disconnected, cleaning up components")
178+
Logger.i(TAG, "UserService disconnected, cleaning up components")
179179
userService = null
180180
cleanupServiceDependentComponents()
181181
}
@@ -215,7 +215,7 @@ class ComponentManager private constructor(private val context: Context) {
215215
historyManager = historyManager
216216
)
217217

218-
android.util.Log.i(TAG, "All service-dependent components initialized")
218+
Logger.i(TAG, "All service-dependent components initialized")
219219
}
220220

221221
/**
@@ -229,7 +229,7 @@ class ComponentManager private constructor(private val context: Context) {
229229
_textInputManager = null
230230
_deviceExecutor = null
231231

232-
android.util.Log.i(TAG, "Service-dependent components cleaned up")
232+
Logger.i(TAG, "Service-dependent components cleaned up")
233233
}
234234

235235
/**
@@ -238,7 +238,7 @@ class ComponentManager private constructor(private val context: Context) {
238238
*/
239239
fun reinitializeAgent() {
240240
if (userService == null) {
241-
android.util.Log.w(TAG, "Cannot reinitialize agent: UserService not connected")
241+
Logger.w(TAG, "Cannot reinitialize agent: UserService not connected")
242242
return
243243
}
244244

@@ -258,7 +258,7 @@ class ComponentManager private constructor(private val context: Context) {
258258
historyManager = historyManager
259259
)
260260

261-
android.util.Log.i(TAG, "PhoneAgent reinitialized with new configuration")
261+
Logger.i(TAG, "PhoneAgent reinitialized with new configuration")
262262
}
263263

264264
/**
@@ -295,7 +295,7 @@ class ComponentManager private constructor(private val context: Context) {
295295
* Should be called when the application is being destroyed.
296296
*/
297297
fun cleanup() {
298-
android.util.Log.i(TAG, "Cleaning up all components")
298+
Logger.i(TAG, "Cleaning up all components")
299299
cleanupServiceDependentComponents()
300300
_modelClient = null
301301
_appResolver = null

app/src/main/java/com/kevinluo/autoglm/MainActivity.kt

Lines changed: 3 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,6 @@ import rikka.shizuku.Shizuku
5050
* The activity implements [PhoneAgentListener] to receive callbacks
5151
* during task execution for UI updates.
5252
*
53-
* Requirements: 1.1, 2.1, 2.2
5453
*/
5554
class MainActivity : AppCompatActivity(), PhoneAgentListener {
5655

@@ -390,7 +389,7 @@ class MainActivity : AppCompatActivity(), PhoneAgentListener {
390389
openShizukuApp()
391390
}
392391

393-
// Settings button - Requirements: 6.1
392+
// Settings button
394393
settingsBtn.setOnClickListener {
395394
startActivity(Intent(this, SettingsActivity::class.java))
396395
}
@@ -416,12 +415,12 @@ class MainActivity : AppCompatActivity(), PhoneAgentListener {
416415
com.kevinluo.autoglm.input.KeyboardHelper.openInputMethodSettings(this)
417416
}
418417

419-
// Start task button - Requirements: 1.1
418+
// Start task button
420419
startTaskBtn.setOnClickListener {
421420
startTask()
422421
}
423422

424-
// Cancel task button - Requirements: 1.4
423+
// Cancel task button
425424
cancelTaskBtn.setOnClickListener {
426425
cancelTask()
427426
}
@@ -604,7 +603,6 @@ class MainActivity : AppCompatActivity(), PhoneAgentListener {
604603
* Called after UserService is connected. Sets up the agent listener
605604
* and confirmation callback for sensitive operations.
606605
*
607-
* Requirements: 1.1, 2.1, 2.2
608606
*/
609607
private fun initializePhoneAgent() {
610608
if (!componentManager.isServiceConnected) {
@@ -667,7 +665,6 @@ class MainActivity : AppCompatActivity(), PhoneAgentListener {
667665
* Validates the task description, checks agent state, starts the
668666
* floating window service, and launches the task in a coroutine.
669667
*
670-
* Requirements: 1.1, 2.1, 2.2
671668
*/
672669
private fun startTask() {
673670
val taskDescription = taskInput.text?.toString()?.trim() ?: ""
@@ -779,7 +776,6 @@ class MainActivity : AppCompatActivity(), PhoneAgentListener {
779776
* Cancels the agent, resets its state, and updates the UI
780777
* to reflect the cancelled status.
781778
*
782-
* Requirements: 1.1, 2.1, 2.2
783779
*/
784780
private fun cancelTask() {
785781
Logger.i(TAG, "Cancelling task")
@@ -865,7 +861,6 @@ class MainActivity : AppCompatActivity(), PhoneAgentListener {
865861
*
866862
* @param status The new task status to display
867863
*
868-
* Requirements: 1.1, 2.1, 2.2
869864
*/
870865
private fun updateTaskStatus(status: TaskStatus) {
871866
val (text, colorRes) = when (status) {
@@ -898,7 +893,6 @@ class MainActivity : AppCompatActivity(), PhoneAgentListener {
898893
*
899894
* @param stepNumber The current step number
900895
*
901-
* Requirements: 1.1, 2.1, 2.2
902896
*/
903897
override fun onStepStarted(stepNumber: Int) {
904898
runOnUiThread {
@@ -916,7 +910,6 @@ class MainActivity : AppCompatActivity(), PhoneAgentListener {
916910
*
917911
* @param thinking The model's thinking text
918912
*
919-
* Requirements: 1.1, 2.1, 2.2
920913
*/
921914
override fun onThinkingUpdate(thinking: String) {
922915
runOnUiThread {
@@ -931,7 +924,6 @@ class MainActivity : AppCompatActivity(), PhoneAgentListener {
931924
*
932925
* @param action The action that was executed
933926
*
934-
* Requirements: 1.1, 2.1, 2.2
935927
*/
936928
override fun onActionExecuted(action: AgentAction) {
937929
runOnUiThread {
@@ -948,7 +940,6 @@ class MainActivity : AppCompatActivity(), PhoneAgentListener {
948940
*
949941
* @param message The completion message
950942
*
951-
* Requirements: 1.1, 2.1, 2.2
952943
*/
953944
override fun onTaskCompleted(message: String) {
954945
runOnUiThread {
@@ -967,7 +958,6 @@ class MainActivity : AppCompatActivity(), PhoneAgentListener {
967958
*
968959
* @param error The error message
969960
*
970-
* Requirements: 1.1, 2.1, 2.2
971961
*/
972962
override fun onTaskFailed(error: String) {
973963
runOnUiThread {
@@ -983,7 +973,6 @@ class MainActivity : AppCompatActivity(), PhoneAgentListener {
983973
*
984974
* Note: Floating window hide is handled by ScreenshotService.
985975
*
986-
* Requirements: 1.1, 2.1, 2.2
987976
*/
988977
override fun onScreenshotStarted() {
989978
// Floating window hide is handled by ScreenshotService
@@ -994,7 +983,6 @@ class MainActivity : AppCompatActivity(), PhoneAgentListener {
994983
*
995984
* Note: Floating window show is handled by ScreenshotService.
996985
*
997-
* Requirements: 1.1, 2.1, 2.2
998986
*/
999987
override fun onScreenshotCompleted() {
1000988
// Floating window show is handled by ScreenshotService

app/src/main/java/com/kevinluo/autoglm/action/ActionHandler.kt

Lines changed: 0 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@ import kotlinx.coroutines.delay
2424
* @param textInputManager Manager for text input operations
2525
* @param floatingWindowProvider Optional provider for floating window controller
2626
*
27-
* Requirements: 1.1, 1.7, 2.1, 2.2
2827
*/
2928
class ActionHandler(
3029
private val deviceExecutor: DeviceExecutor,
@@ -109,7 +108,6 @@ class ActionHandler(
109108
* @param screenHeight Current screen height in pixels
110109
* @return The result of the action execution
111110
*
112-
* Requirements: 1.7
113111
*/
114112
suspend fun execute(
115113
action: AgentAction,
@@ -160,7 +158,6 @@ class ActionHandler(
160158
* Executes a Tap action.
161159
* Hides floating window before tap to prevent touch interception.
162160
* Uses try-finally to ensure floating window is restored even if tap fails.
163-
* Requirements: 5.1, 5.2
164161
*/
165162
private suspend fun executeTap(
166163
action: AgentAction.Tap,
@@ -200,7 +197,6 @@ class ActionHandler(
200197
* Executes a Swipe action.
201198
* Hides floating window before swipe to prevent touch interception.
202199
* Uses try-finally to ensure floating window is restored even if swipe fails.
203-
* Requirements: 5.3, 5.4, 10.1-10.5
204200
*/
205201
private suspend fun executeSwipe(
206202
action: AgentAction.Swipe,
@@ -255,7 +251,6 @@ class ActionHandler(
255251
* 3. The BroadcastReceiver is registered
256252
*
257253
* Uses try-finally to ensure floating window is restored even if typing fails.
258-
* Requirements: 5.5
259254
*/
260255
private suspend fun executeType(action: AgentAction.Type): ActionResult {
261256
// Hide floating window to ensure target app has focus
@@ -276,7 +271,6 @@ class ActionHandler(
276271
/**
277272
* Executes a TypeName action (same as Type).
278273
* Uses try-finally to ensure floating window is restored even if typing fails.
279-
* Requirements: 5.6
280274
*/
281275
private suspend fun executeTypeName(action: AgentAction.TypeName): ActionResult {
282276
// Hide floating window to ensure target app has focus
@@ -300,7 +294,6 @@ class ActionHandler(
300294
* If not found, returns success with a message instructing the model
301295
* to find the app icon on screen (home screen or app drawer).
302296
*
303-
* Requirements: 5.7, 5.8
304297
*/
305298
private suspend fun executeLaunch(action: AgentAction.Launch): ActionResult {
306299
Logger.d(TAG, "Launching app: ${action.app}")
@@ -381,7 +374,6 @@ class ActionHandler(
381374
* Executes a Back action.
382375
* First dismisses the soft keyboard (if shown) to ensure the back action
383376
* actually navigates back instead of just closing the keyboard.
384-
* Requirements: 5.9
385377
*/
386378
private suspend fun executeBack(): ActionResult {
387379
// First, dismiss keyboard with ESCAPE key to ensure Back actually navigates
@@ -401,7 +393,6 @@ class ActionHandler(
401393

402394
/**
403395
* Executes a Home action.
404-
* Requirements: 5.10
405396
*/
406397
private suspend fun executeHome(): ActionResult {
407398
val result = deviceExecutor.pressKey(DeviceExecutor.KEYCODE_HOME)
@@ -415,7 +406,6 @@ class ActionHandler(
415406

416407
/**
417408
* Executes a VolumeUp action.
418-
* Requirements: 5.11
419409
*/
420410
private suspend fun executeVolumeUp(): ActionResult {
421411
val result = deviceExecutor.pressKey(DeviceExecutor.KEYCODE_VOLUME_UP)
@@ -429,7 +419,6 @@ class ActionHandler(
429419

430420
/**
431421
* Executes a VolumeDown action.
432-
* Requirements: 5.12
433422
*/
434423
private suspend fun executeVolumeDown(): ActionResult {
435424
val result = deviceExecutor.pressKey(DeviceExecutor.KEYCODE_VOLUME_DOWN)
@@ -443,7 +432,6 @@ class ActionHandler(
443432

444433
/**
445434
* Executes a Power action.
446-
* Requirements: 5.13
447435
*/
448436
private suspend fun executePower(): ActionResult {
449437
val result = deviceExecutor.pressKey(DeviceExecutor.KEYCODE_POWER)
@@ -460,7 +448,6 @@ class ActionHandler(
460448
* Executes a LongPress action.
461449
* Hides floating window before long press to prevent touch interception.
462450
* Uses try-finally to ensure floating window is restored even if long press fails.
463-
* Requirements: 5.14
464451
*/
465452
private suspend fun executeLongPress(
466453
action: AgentAction.LongPress,
@@ -497,7 +484,6 @@ class ActionHandler(
497484
* Executes a DoubleTap action.
498485
* Hides floating window before double tap to prevent touch interception.
499486
* Uses try-finally to ensure floating window is restored even if double tap fails.
500-
* Requirements: 5.15
501487
*/
502488
private suspend fun executeDoubleTap(
503489
action: AgentAction.DoubleTap,
@@ -527,7 +513,6 @@ class ActionHandler(
527513

528514
/**
529515
* Executes a Wait action.
530-
* Requirements: 5.16
531516
*/
532517
private suspend fun executeWait(action: AgentAction.Wait): ActionResult {
533518
val durationMs = (action.durationSeconds * 1000).toLong()
@@ -537,7 +522,6 @@ class ActionHandler(
537522

538523
/**
539524
* Executes a TakeOver action.
540-
* Requirements: 5.17
541525
*/
542526
private suspend fun executeTakeOver(action: AgentAction.TakeOver): ActionResult {
543527
confirmationCallback?.onTakeOverRequested(action.message)
@@ -546,7 +530,6 @@ class ActionHandler(
546530

547531
/**
548532
* Executes an Interact action.
549-
* Requirements: 5.18
550533
*/
551534
private suspend fun executeInteract(action: AgentAction.Interact): ActionResult {
552535
val selectedIndex = confirmationCallback?.onInteractionRequired(action.options) ?: -1
@@ -560,7 +543,6 @@ class ActionHandler(
560543

561544
/**
562545
* Executes a Note action.
563-
* Requirements: 5.19
564546
*/
565547
private suspend fun executeNote(action: AgentAction.Note): ActionResult {
566548
// Note action just records the message, no device operation needed
@@ -569,7 +551,6 @@ class ActionHandler(
569551

570552
/**
571553
* Executes a CallApi action.
572-
* Requirements: 5.20
573554
*/
574555
private suspend fun executeCallApi(action: AgentAction.CallApi): ActionResult {
575556
// CallApi action is handled by the agent layer, not device operations
@@ -578,7 +559,6 @@ class ActionHandler(
578559

579560
/**
580561
* Executes a Finish action.
581-
* Requirements: 5.21
582562
*/
583563
private suspend fun executeFinish(action: AgentAction.Finish): ActionResult {
584564
return ActionResult(true, true, action.message)

app/src/main/java/com/kevinluo/autoglm/action/ActionParser.kt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@ import com.kevinluo.autoglm.util.Logger
1313
* and system key actions. It validates coordinate ranges and provides detailed
1414
* error messages for parsing failures.
1515
*
16-
* Requirements: 1.1, 2.1, 2.2
1716
*/
1817
object ActionParser {
1918

app/src/main/java/com/kevinluo/autoglm/action/AgentAction.kt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@ package com.kevinluo.autoglm.action
1010
* The sealed class pattern ensures exhaustive handling of all action types
1111
* and provides type-safe action processing.
1212
*
13-
* Requirements: 2.1, 7.2
1413
*/
1514
sealed class AgentAction {
1615

0 commit comments

Comments
 (0)