{"openapi":"3.1.0","info":{"title":"FishAudio OpenAPI","version":"1"},"paths":{"/wallet/{user_id}/package":{"get":{"summary":"Get User Package","security":[{"BearerAuth":[]}],"parameters":[{"in":"path","name":"user_id","description":"User ID or 'self'","required":false,"schema":{"default":"self","title":"User Id","type":"string"},"deprecated":false}],"responses":{"200":{"description":"Request fulfilled, document follows","headers":{},"content":{"application/json":{"schema":{"properties":{"user_id":{"title":"User Id","type":"string"},"type":{"title":"Type","type":"string"},"total":{"title":"Total","type":"integer"},"balance":{"title":"Balance","type":"integer"},"created_at":{"format":"date-time","title":"Created At","type":"string"},"updated_at":{"format":"date-time","title":"Updated At","type":"string"},"finished_at":{"format":"date-time","title":"Finished At","type":"string"},"stripe_subscription_id":{"anyOf":[{"type":"string"},{"type":"null"}],"default":null,"title":"Stripe Subscription Id"},"stripe_price_id":{"anyOf":[{"type":"string"},{"type":"null"}],"default":null,"title":"Stripe Price Id"},"billing_period":{"anyOf":[{"type":"string"},{"type":"null"}],"default":null,"title":"Billing Period"},"current_period_end":{"anyOf":[{"format":"date-time","type":"string"},{"type":"null"}],"default":null,"title":"Current Period End"},"cancel_at_period_end":{"anyOf":[{"type":"boolean"},{"type":"null"}],"default":null,"title":"Cancel At Period End"},"cancel_at":{"anyOf":[{"format":"date-time","type":"string"},{"type":"null"}],"default":null,"title":"Cancel At"},"scheduled_change":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"default":null,"title":"Scheduled Change"},"last_synced_at":{"anyOf":[{"format":"date-time","type":"string"},{"type":"null"}],"default":null,"title":"Last Synced At"},"extra_balance":{"anyOf":[{"type":"integer"},{"type":"null"}],"default":0,"title":"Extra Balance"},"has_used_trial":{"default":false,"title":"Has Used Trial","type":"boolean"}},"required":["user_id","type","total","balance","created_at","updated_at","finished_at"],"type":"object"}}}},"401":{"description":"No permission -- see authorization schemes","headers":{},"content":{"application/json":{"schema":{"properties":{"status":{"title":"Status","type":"integer"},"message":{"title":"Message","type":"string"}},"required":["status","message"],"type":"object"}}}},"422":{"description":"","headers":{},"content":{"application/json":{"schema":{"type":"array","items":{"type":"object","properties":{"loc":{"title":"Location","description":"error field","type":"array","items":{"type":"string"}},"type":{"title":"Type","description":"error type","type":"string"},"msg":{"title":"Message","description":"error message","type":"string"},"ctx":{"title":"Context","description":"error context","type":"string"},"in":{"title":"In","type":"string","enum":["path","query","header","cookie","body"]}},"required":["loc","type","msg"]}}}}}},"tags":["Wallet"]}},"/wallet/{user_id}/api-credit":{"get":{"summary":"Get API Credit","security":[{"BearerAuth":[]}],"parameters":[{"in":"query","name":"check_free_credit","description":"","required":false,"schema":{"default":false,"title":"Check Free Credit","type":"boolean"},"deprecated":false},{"in":"query","name":"team_id","description":"","required":false,"schema":{"anyOf":[{"type":"string"},{"type":"null"}],"default":null,"title":"Team Id"},"deprecated":false},{"in":"path","name":"user_id","description":"User ID or 'self'","required":false,"schema":{"default":"self","title":"User Id","type":"string"},"deprecated":false}],"responses":{"200":{"description":"Request fulfilled, document follows","headers":{},"content":{"application/json":{"schema":{"properties":{"_id":{"title":"Id","type":"string"},"user_id":{"title":"User Id","type":"string"},"credit":{"title":"Credit","type":"string"},"created_at":{"format":"date-time","title":"Created At","type":"string"},"updated_at":{"format":"date-time","title":"Updated At","type":"string"},"has_phone_sha256":{"title":"Has Phone Sha256","type":"boolean"},"has_free_credit":{"anyOf":[{"type":"boolean"},{"type":"null"}],"default":null,"title":"Has Free Credit"}},"required":["_id","user_id","credit","created_at","updated_at","has_phone_sha256"],"type":"object"}}}},"401":{"description":"No permission -- see authorization schemes","headers":{},"content":{"application/json":{"schema":{"properties":{"status":{"title":"Status","type":"integer"},"message":{"title":"Message","type":"string"}},"required":["status","message"],"type":"object"}}}},"422":{"description":"","headers":{},"content":{"application/json":{"schema":{"type":"array","items":{"type":"object","properties":{"loc":{"title":"Location","description":"error field","type":"array","items":{"type":"string"}},"type":{"title":"Type","description":"error type","type":"string"},"msg":{"title":"Message","description":"error message","type":"string"},"ctx":{"title":"Context","description":"error context","type":"string"},"in":{"title":"In","type":"string","enum":["path","query","header","cookie","body"]}},"required":["loc","type","msg"]}}}}}},"tags":["Wallet"]}},"/model":{"get":{"summary":"List Models","security":[{"BearerAuth":[]}],"parameters":[{"in":"query","name":"page_size","description":"Page size","required":false,"schema":{"default":10,"minimum":1,"title":"Page Size","type":"integer"},"deprecated":false},{"in":"query","name":"page_number","description":"Page number","required":false,"schema":{"default":1,"minimum":1,"title":"Page Number","type":"integer"},"deprecated":false},{"in":"query","name":"title","description":"Title to filter models","required":false,"schema":{"anyOf":[{"type":"string"},{"type":"null"}],"default":null,"title":"Title"},"deprecated":false},{"in":"query","name":"tag","description":"Tag to filter models","required":false,"schema":{"anyOf":[{"items":{"type":"string"},"type":"array"},{"type":"string"},{"type":"null"}],"default":null,"title":"Tag"},"deprecated":false},{"in":"query","name":"self","description":"If True, only models created by the user will be returned","required":false,"schema":{"default":false,"title":"Self","type":"boolean"},"deprecated":false},{"in":"query","name":"author_id","description":"Author ID to filter models, this will be ignored if self is True","required":false,"schema":{"anyOf":[{"type":"string"},{"type":"null"}],"default":null,"title":"Author Id"},"deprecated":false},{"in":"query","name":"language","description":"Language to filter models","required":false,"schema":{"anyOf":[{"items":{"type":"string"},"type":"array"},{"type":"string"},{"type":"null"}],"default":null,"title":"Language"},"deprecated":false},{"in":"query","name":"title_language","description":"Title language to filter models","required":false,"schema":{"anyOf":[{"items":{"type":"string"},"type":"array"},{"type":"string"},{"type":"null"}],"default":null,"title":"Title Language"},"deprecated":false},{"in":"query","name":"sort_by","description":"","required":false,"schema":{"default":"score","enum":["score","task_count","created_at"],"title":"Sort By","type":"string"},"deprecated":false}],"responses":{"200":{"description":"Request fulfilled, document follows","headers":{},"content":{"application/json":{"schema":{"properties":{"total":{"title":"Total","type":"integer"},"items":{"items":{"$ref":"#/components/schemas/ModelEntity"},"title":"Items","type":"array"}},"required":["total","items"],"type":"object"}}}},"422":{"description":"","headers":{},"content":{"application/json":{"schema":{"type":"array","items":{"type":"object","properties":{"loc":{"title":"Location","description":"error field","type":"array","items":{"type":"string"}},"type":{"title":"Type","description":"error type","type":"string"},"msg":{"title":"Message","description":"error message","type":"string"},"ctx":{"title":"Context","description":"error context","type":"string"},"in":{"title":"In","type":"string","enum":["path","query","header","cookie","body"]}},"required":["loc","type","msg"]}}}}}},"tags":["Model"]},"post":{"summary":"Create Model for Users via API","security":[{"BearerAuth":[]}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"properties":{"visibility":{"default":"public","description":"Model visibility, public will be shown in the discovery page, unlist allows anyone with the link to access, private only be visible to the creator","enum":["public","unlist","private"],"title":"Visibility","type":"string"},"type":{"const":"tts","description":"Model type, tts is for text to speech","title":"Type","type":"string"},"title":{"description":"Model title or name","title":"Title","type":"string"},"description":{"anyOf":[{"type":"string"},{"type":"null"}],"default":null,"description":"Model description","title":"Description"},"cover_image":{"anyOf":[{"format":"binary","type":"string"},{"type":"null"}],"default":null,"description":"Model cover image, this is required if the model is public","title":"Cover Image"},"train_mode":{"const":"fast","description":"Model train mode, for TTS model, fast means model instantly available after creation","title":"Train Mode","type":"string"},"voices":{"anyOf":[{"items":{"format":"binary","type":"string"},"type":"array"},{"format":"binary","type":"string"}],"description":"Upload voices files that will be used to tune the model","title":"Voices"},"texts":{"anyOf":[{"type":"string"},{"items":{"type":"string"},"type":"array"},{"type":"null"}],"default":null,"description":"Texts corresponding to the voices, if unspecified, ASR will be performed on the voices","title":"Texts"},"tags":{"anyOf":[{"type":"string"},{"items":{"type":"string"},"type":"array"},{"type":"null"}],"description":"Model tags","title":"Tags"},"enhance_audio_quality":{"default":true,"description":"Enhance audio quality","title":"Enhance Audio Quality","type":"boolean"},"generate_sample":{"default":false,"description":"Generate default text","title":"Generate Sample","type":"boolean"}},"required":["type","title","train_mode","voices"],"type":"object"}},"application/x-www-form-urlencoded":{"schema":{"properties":{"visibility":{"default":"public","description":"Model visibility, public will be shown in the discovery page, unlist allows anyone with the link to access, private only be visible to the creator","enum":["public","unlist","private"],"title":"Visibility","type":"string"},"type":{"const":"tts","description":"Model type, tts is for text to speech","title":"Type","type":"string"},"title":{"description":"Model title or name","title":"Title","type":"string"},"description":{"anyOf":[{"type":"string"},{"type":"null"}],"default":null,"description":"Model description","title":"Description"},"cover_image":{"anyOf":[{"format":"binary","type":"string"},{"type":"null"}],"default":null,"description":"Model cover image, this is required if the model is public","title":"Cover Image"},"train_mode":{"const":"fast","description":"Model train mode, for TTS model, fast means model instantly available after creation","title":"Train Mode","type":"string"},"voices":{"anyOf":[{"items":{"format":"binary","type":"string"},"type":"array"},{"format":"binary","type":"string"}],"description":"Upload voices files that will be used to tune the model","title":"Voices"},"texts":{"anyOf":[{"type":"string"},{"items":{"type":"string"},"type":"array"},{"type":"null"}],"default":null,"description":"Texts corresponding to the voices, if unspecified, ASR will be performed on the voices","title":"Texts"},"tags":{"anyOf":[{"type":"string"},{"items":{"type":"string"},"type":"array"},{"type":"null"}],"description":"Model tags","title":"Tags"},"enhance_audio_quality":{"default":true,"description":"Enhance audio quality","title":"Enhance Audio Quality","type":"boolean"},"generate_sample":{"default":false,"description":"Generate default text","title":"Generate Sample","type":"boolean"}},"required":["type","title","train_mode","voices"],"type":"object"}},"multipart/form-data":{"schema":{"properties":{"visibility":{"default":"public","description":"Model visibility, public will be shown in the discovery page, unlist allows anyone with the link to access, private only be visible to the creator","enum":["public","unlist","private"],"title":"Visibility","type":"string"},"type":{"const":"tts","description":"Model type, tts is for text to speech","title":"Type","type":"string"},"title":{"description":"Model title or name","title":"Title","type":"string"},"description":{"anyOf":[{"type":"string"},{"type":"null"}],"default":null,"description":"Model description","title":"Description"},"cover_image":{"anyOf":[{"format":"binary","type":"string"},{"type":"null"}],"default":null,"description":"Model cover image, this is required if the model is public","title":"Cover Image"},"train_mode":{"const":"fast","description":"Model train mode, for TTS model, fast means model instantly available after creation","title":"Train Mode","type":"string"},"voices":{"anyOf":[{"items":{"format":"binary","type":"string"},"type":"array"},{"format":"binary","type":"string"}],"description":"Upload voices files that will be used to tune the model","title":"Voices"},"texts":{"anyOf":[{"type":"string"},{"items":{"type":"string"},"type":"array"},{"type":"null"}],"default":null,"description":"Texts corresponding to the voices, if unspecified, ASR will be performed on the voices","title":"Texts"},"tags":{"anyOf":[{"type":"string"},{"items":{"type":"string"},"type":"array"},{"type":"null"}],"description":"Model tags","title":"Tags"},"enhance_audio_quality":{"default":true,"description":"Enhance audio quality","title":"Enhance Audio Quality","type":"boolean"},"generate_sample":{"default":false,"description":"Generate default text","title":"Generate Sample","type":"boolean"}},"required":["type","title","train_mode","voices"],"type":"object"}},"application/msgpack":{"schema":{"properties":{"visibility":{"default":"public","description":"Model visibility, public will be shown in the discovery page, unlist allows anyone with the link to access, private only be visible to the creator","enum":["public","unlist","private"],"title":"Visibility","type":"string"},"type":{"const":"tts","description":"Model type, tts is for text to speech","title":"Type","type":"string"},"title":{"description":"Model title or name","title":"Title","type":"string"},"description":{"anyOf":[{"type":"string"},{"type":"null"}],"default":null,"description":"Model description","title":"Description"},"cover_image":{"anyOf":[{"format":"binary","type":"string"},{"type":"null"}],"default":null,"description":"Model cover image, this is required if the model is public","title":"Cover Image"},"train_mode":{"const":"fast","description":"Model train mode, for TTS model, fast means model instantly available after creation","title":"Train Mode","type":"string"},"voices":{"anyOf":[{"items":{"format":"binary","type":"string"},"type":"array"},{"format":"binary","type":"string"}],"description":"Upload voices files that will be used to tune the model","title":"Voices"},"texts":{"anyOf":[{"type":"string"},{"items":{"type":"string"},"type":"array"},{"type":"null"}],"default":null,"description":"Texts corresponding to the voices, if unspecified, ASR will be performed on the voices","title":"Texts"},"tags":{"anyOf":[{"type":"string"},{"items":{"type":"string"},"type":"array"},{"type":"null"}],"description":"Model tags","title":"Tags"},"enhance_audio_quality":{"default":true,"description":"Enhance audio quality","title":"Enhance Audio Quality","type":"boolean"},"generate_sample":{"default":false,"description":"Generate default text","title":"Generate Sample","type":"boolean"}},"required":["type","title","train_mode","voices"],"type":"object"}}}},"responses":{"201":{"description":"Document created, URL follows","headers":{},"content":{"application/json":{"schema":{"properties":{"_id":{"title":"Id","type":"string"},"type":{"enum":["svc","tts"],"title":"Type","type":"string"},"title":{"title":"Title","type":"string"},"description":{"title":"Description","type":"string"},"cover_image":{"title":"Cover Image","type":"string"},"train_mode":{"default":"full","enum":["fast","full"],"title":"Train Mode","type":"string"},"state":{"enum":["created","training","trained","failed"],"title":"State","type":"string"},"tags":{"items":{"type":"string"},"title":"Tags","type":"array"},"samples":{"default":[],"items":{"$ref":"#/components/schemas/SampleEntity"},"title":"Samples","type":"array"},"created_at":{"format":"date-time","title":"Created At","type":"string"},"updated_at":{"format":"date-time","title":"Updated At","type":"string"},"languages":{"default":[],"items":{"type":"string"},"title":"Languages","type":"array"},"visibility":{"enum":["public","unlist","private"],"title":"Visibility","type":"string"},"lock_visibility":{"default":false,"title":"Lock Visibility","type":"boolean"},"dmca_taken_down":{"anyOf":[{"type":"boolean"},{"type":"null"}],"default":false,"title":"Dmca Taken Down"},"default_text":{"default":"","title":"Default Text","type":"string"},"quality":{"anyOf":[{"$ref":"#/components/schemas/ModelQualityEntity"},{"type":"null"}],"default":null},"like_count":{"title":"Like Count","type":"integer"},"mark_count":{"title":"Mark Count","type":"integer"},"shared_count":{"title":"Shared Count","type":"integer"},"task_count":{"title":"Task Count","type":"integer"},"unliked":{"default":false,"title":"Unliked","type":"boolean"},"liked":{"default":false,"title":"Liked","type":"boolean"},"marked":{"default":false,"title":"Marked","type":"boolean"},"author":{"$ref":"#/components/schemas/AuthorEntity"}},"required":["_id","type","title","description","cover_image","state","tags","created_at","updated_at","visibility","like_count","mark_count","shared_count","task_count","author"],"type":"object"}}}},"401":{"description":"No permission -- see authorization schemes","headers":{},"content":{"application/json":{"schema":{"properties":{"status":{"title":"Status","type":"integer"},"message":{"title":"Message","type":"string"}},"required":["status","message"],"type":"object"}}}},"422":{"description":"","headers":{},"content":{"application/json":{"schema":{"type":"array","items":{"type":"object","properties":{"loc":{"title":"Location","description":"error field","type":"array","items":{"type":"string"}},"type":{"title":"Type","description":"error type","type":"string"},"msg":{"title":"Message","description":"error message","type":"string"},"ctx":{"title":"Context","description":"error context","type":"string"},"in":{"title":"In","type":"string","enum":["path","query","header","cookie","body"]}},"required":["loc","type","msg"]}}}}}},"tags":["Model"]}},"/model/{id}":{"get":{"summary":"Get Model","security":[{"BearerAuth":[]}],"parameters":[{"in":"path","name":"id","description":"","required":true,"schema":{"title":"Id","type":"string"},"deprecated":false}],"responses":{"200":{"description":"Request fulfilled, document follows","headers":{},"content":{"application/json":{"schema":{"properties":{"_id":{"title":"Id","type":"string"},"type":{"enum":["svc","tts"],"title":"Type","type":"string"},"title":{"title":"Title","type":"string"},"description":{"title":"Description","type":"string"},"cover_image":{"title":"Cover Image","type":"string"},"train_mode":{"default":"full","enum":["fast","full"],"title":"Train Mode","type":"string"},"state":{"enum":["created","training","trained","failed"],"title":"State","type":"string"},"tags":{"items":{"type":"string"},"title":"Tags","type":"array"},"samples":{"default":[],"items":{"$ref":"#/components/schemas/SampleEntity"},"title":"Samples","type":"array"},"created_at":{"format":"date-time","title":"Created At","type":"string"},"updated_at":{"format":"date-time","title":"Updated At","type":"string"},"languages":{"default":[],"items":{"type":"string"},"title":"Languages","type":"array"},"visibility":{"enum":["public","unlist","private"],"title":"Visibility","type":"string"},"lock_visibility":{"default":false,"title":"Lock Visibility","type":"boolean"},"dmca_taken_down":{"anyOf":[{"type":"boolean"},{"type":"null"}],"default":false,"title":"Dmca Taken Down"},"default_text":{"default":"","title":"Default Text","type":"string"},"quality":{"anyOf":[{"$ref":"#/components/schemas/ModelQualityEntity"},{"type":"null"}],"default":null},"like_count":{"title":"Like Count","type":"integer"},"mark_count":{"title":"Mark Count","type":"integer"},"shared_count":{"title":"Shared Count","type":"integer"},"task_count":{"title":"Task Count","type":"integer"},"unliked":{"default":false,"title":"Unliked","type":"boolean"},"liked":{"default":false,"title":"Liked","type":"boolean"},"marked":{"default":false,"title":"Marked","type":"boolean"},"author":{"$ref":"#/components/schemas/AuthorEntity"}},"required":["_id","type","title","description","cover_image","state","tags","created_at","updated_at","visibility","like_count","mark_count","shared_count","task_count","author"],"type":"object"}}}},"404":{"description":"Nothing matches the given URI","headers":{},"content":{"application/json":{"schema":{"properties":{"status":{"title":"Status","type":"integer"},"message":{"title":"Message","type":"string"}},"required":["status","message"],"type":"object"}}}},"422":{"description":"","headers":{},"content":{"application/json":{"schema":{"type":"array","items":{"type":"object","properties":{"loc":{"title":"Location","description":"error field","type":"array","items":{"type":"string"}},"type":{"title":"Type","description":"error type","type":"string"},"msg":{"title":"Message","description":"error message","type":"string"},"ctx":{"title":"Context","description":"error context","type":"string"},"in":{"title":"In","type":"string","enum":["path","query","header","cookie","body"]}},"required":["loc","type","msg"]}}}}}},"tags":["Model"]},"patch":{"summary":"Update Model","security":[{"BearerAuth":[]}],"parameters":[{"in":"path","name":"id","description":"","required":true,"schema":{"title":"Id","type":"string"},"deprecated":false}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"properties":{"title":{"anyOf":[{"type":"string"},{"type":"null"}],"default":null,"title":"Title"},"description":{"anyOf":[{"type":"string"},{"type":"null"}],"default":null,"title":"Description"},"cover_image":{"anyOf":[{"format":"binary","type":"string"},{"type":"null"}],"default":null,"title":"Cover Image"},"visibility":{"anyOf":[{"enum":["public","unlist","private"],"type":"string"},{"type":"null"}],"default":null,"title":"Visibility"},"tags":{"anyOf":[{"items":{"type":"string"},"type":"array"},{"type":"string"}],"title":"Tags"}},"type":"object"}},"application/x-www-form-urlencoded":{"schema":{"properties":{"title":{"anyOf":[{"type":"string"},{"type":"null"}],"default":null,"title":"Title"},"description":{"anyOf":[{"type":"string"},{"type":"null"}],"default":null,"title":"Description"},"cover_image":{"anyOf":[{"format":"binary","type":"string"},{"type":"null"}],"default":null,"title":"Cover Image"},"visibility":{"anyOf":[{"enum":["public","unlist","private"],"type":"string"},{"type":"null"}],"default":null,"title":"Visibility"},"tags":{"anyOf":[{"items":{"type":"string"},"type":"array"},{"type":"string"}],"title":"Tags"}},"type":"object"}},"multipart/form-data":{"schema":{"properties":{"title":{"anyOf":[{"type":"string"},{"type":"null"}],"default":null,"title":"Title"},"description":{"anyOf":[{"type":"string"},{"type":"null"}],"default":null,"title":"Description"},"cover_image":{"anyOf":[{"format":"binary","type":"string"},{"type":"null"}],"default":null,"title":"Cover Image"},"visibility":{"anyOf":[{"enum":["public","unlist","private"],"type":"string"},{"type":"null"}],"default":null,"title":"Visibility"},"tags":{"anyOf":[{"items":{"type":"string"},"type":"array"},{"type":"string"}],"title":"Tags"}},"type":"object"}},"application/msgpack":{"schema":{"properties":{"title":{"anyOf":[{"type":"string"},{"type":"null"}],"default":null,"title":"Title"},"description":{"anyOf":[{"type":"string"},{"type":"null"}],"default":null,"title":"Description"},"cover_image":{"anyOf":[{"format":"binary","type":"string"},{"type":"null"}],"default":null,"title":"Cover Image"},"visibility":{"anyOf":[{"enum":["public","unlist","private"],"type":"string"},{"type":"null"}],"default":null,"title":"Visibility"},"tags":{"anyOf":[{"items":{"type":"string"},"type":"array"},{"type":"string"}],"title":"Tags"}},"type":"object"}}}},"responses":{"200":{"description":"Request fulfilled, document follows","headers":{}},"401":{"description":"No permission -- see authorization schemes","headers":{},"content":{"application/json":{"schema":{"properties":{"status":{"title":"Status","type":"integer"},"message":{"title":"Message","type":"string"}},"required":["status","message"],"type":"object"}}}},"422":{"description":"","headers":{},"content":{"application/json":{"schema":{"type":"array","items":{"type":"object","properties":{"loc":{"title":"Location","description":"error field","type":"array","items":{"type":"string"}},"type":{"title":"Type","description":"error type","type":"string"},"msg":{"title":"Message","description":"error message","type":"string"},"ctx":{"title":"Context","description":"error context","type":"string"},"in":{"title":"In","type":"string","enum":["path","query","header","cookie","body"]}},"required":["loc","type","msg"]}}}}}},"tags":["Model"]},"delete":{"summary":"Delete Model","security":[{"BearerAuth":[]}],"parameters":[{"in":"path","name":"id","description":"","required":true,"schema":{"title":"Id","type":"string"},"deprecated":false}],"responses":{"200":{"description":"Request fulfilled, document follows","headers":{}},"401":{"description":"No permission -- see authorization schemes","headers":{},"content":{"application/json":{"schema":{"properties":{"status":{"title":"Status","type":"integer"},"message":{"title":"Message","type":"string"}},"required":["status","message"],"type":"object"}}}},"422":{"description":"","headers":{},"content":{"application/json":{"schema":{"type":"array","items":{"type":"object","properties":{"loc":{"title":"Location","description":"error field","type":"array","items":{"type":"string"}},"type":{"title":"Type","description":"error type","type":"string"},"msg":{"title":"Message","description":"error message","type":"string"},"ctx":{"title":"Context","description":"error context","type":"string"},"in":{"title":"In","type":"string","enum":["path","query","header","cookie","body"]}},"required":["loc","type","msg"]}}}}}},"tags":["Model"]}},"/v1/tts":{"post":{"summary":"Text to Speech","security":[{"BearerAuth":[]}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/TTSRequest"}},"application/msgpack":{"schema":{"$ref":"#/components/schemas/TTSRequest"}}}},"responses":{"200":{"description":"Request fulfilled, document follows","headers":{"Transfer-Encoding":{"schema":{"type":"string"},"description":"chunked"}}},"401":{"description":"No permission -- see authorization schemes","headers":{},"content":{"application/json":{"schema":{"properties":{"status":{"title":"Status","type":"integer"},"message":{"title":"Message","type":"string"}},"required":["status","message"],"type":"object"}}}},"402":{"description":"No payment -- see charging schemes","headers":{},"content":{"application/json":{"schema":{"properties":{"status":{"title":"Status","type":"integer"},"message":{"title":"Message","type":"string"}},"required":["status","message"],"type":"object"}}}},"422":{"description":"","headers":{},"content":{"application/json":{"schema":{"type":"array","items":{"type":"object","properties":{"loc":{"title":"Location","description":"error field","type":"array","items":{"type":"string"}},"type":{"title":"Type","description":"error type","type":"string"},"msg":{"title":"Message","description":"error message","type":"string"},"ctx":{"title":"Context","description":"error context","type":"string"},"in":{"title":"In","type":"string","enum":["path","query","header","cookie","body"]}},"required":["loc","type","msg"]}}}}}},"tags":["OpenAPI v1"],"parameters":[{"in":"header","name":"model","description":"Specify which TTS model to use. We recommend `s2-pro`.","required":true,"schema":{"type":"string","default":"s2-pro","enum":["s1","s2-pro"]}}],"x-codeSamples":[{"lang":"bash","label":"Single Speaker","source":"curl --request POST \\\n  --url https://api.fish.audio/v1/tts \\\n  --header 'Authorization: Bearer <token>' \\\n  --header 'Content-Type: application/json' \\\n  --header 'model: s2-pro' \\\n  --data '{\n    \"text\": \"Hello! Welcome to Fish Audio.\",\n    \"reference_id\": \"model-id\",\n    \"temperature\": 0.7,\n    \"top_p\": 0.7,\n    \"prosody\": {\n      \"speed\": 1,\n      \"volume\": 0,\n      \"normalize_loudness\": true\n    },\n    \"chunk_length\": 300,\n    \"normalize\": true,\n    \"format\": \"mp3\",\n    \"sample_rate\": 44100,\n    \"mp3_bitrate\": 128,\n    \"latency\": \"normal\",\n    \"max_new_tokens\": 1024,\n    \"repetition_penalty\": 1.2,\n    \"min_chunk_length\": 50,\n    \"condition_on_previous_chunks\": true,\n    \"early_stop_threshold\": 1\n  }'"},{"lang":"bash","label":"Multi Speaker (S2-Pro only)","source":"curl --request POST \\\n  --url https://api.fish.audio/v1/tts \\\n  --header 'Authorization: Bearer <token>' \\\n  --header 'Content-Type: application/json' \\\n  --header 'model: s2-pro' \\\n  --data '{\n    \"text\": \"<|speaker:0|>Hello!<|speaker:1|>Hi there!\",\n    \"reference_id\": [\"speaker-a-id\", \"speaker-b-id\"],\n    \"temperature\": 0.7,\n    \"top_p\": 0.7,\n    \"prosody\": {\n      \"speed\": 1,\n      \"volume\": 0,\n      \"normalize_loudness\": true\n    },\n    \"chunk_length\": 300,\n    \"normalize\": true,\n    \"format\": \"mp3\",\n    \"sample_rate\": 44100,\n    \"mp3_bitrate\": 128,\n    \"latency\": \"normal\",\n    \"max_new_tokens\": 1024,\n    \"repetition_penalty\": 1.2,\n    \"min_chunk_length\": 50,\n    \"condition_on_previous_chunks\": true,\n    \"early_stop_threshold\": 1\n  }'"}]}},"/v1/asr":{"post":{"summary":"Speech to Text","security":[{"BearerAuth":[]}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"properties":{"audio":{"description":"Audio to be converted to text","format":"binary","title":"Audio","type":"string"},"language":{"anyOf":[{"type":"string"},{"type":"null"}],"default":null,"description":"Language to be used for the speech","title":"Language"},"ignore_timestamps":{"default":true,"description":"Whether to return precise timestamps in the text, this will increase the latency in audio shorter than 30 seconds","title":"Ignore Timestamps","type":"boolean"}},"required":["audio"],"type":"object"}},"application/msgpack":{"schema":{"properties":{"audio":{"description":"Audio to be converted to text","format":"binary","title":"Audio","type":"string"},"language":{"anyOf":[{"type":"string"},{"type":"null"}],"default":null,"description":"Language to be used for the speech","title":"Language"},"ignore_timestamps":{"default":true,"description":"Whether to return precise timestamps in the text, this will increase the latency in audio shorter than 30 seconds","title":"Ignore Timestamps","type":"boolean"}},"required":["audio"],"type":"object"}}}},"responses":{"200":{"description":"Request fulfilled, document follows","headers":{},"content":{"application/json":{"schema":{"properties":{"text":{"title":"Text","type":"string"},"duration":{"description":"Duration of the audio in seconds","title":"Duration","type":"number"},"segments":{"items":{"$ref":"#/components/schemas/ASRSegment"},"title":"Segments","type":"array"}},"required":["text","duration","segments"],"type":"object"}}}},"401":{"description":"No permission -- see authorization schemes","headers":{},"content":{"application/json":{"schema":{"properties":{"status":{"title":"Status","type":"integer"},"message":{"title":"Message","type":"string"}},"required":["status","message"],"type":"object"}}}},"402":{"description":"No payment -- see charging schemes","headers":{},"content":{"application/json":{"schema":{"properties":{"status":{"title":"Status","type":"integer"},"message":{"title":"Message","type":"string"}},"required":["status","message"],"type":"object"}}}},"422":{"description":"","headers":{},"content":{"application/json":{"schema":{"type":"array","items":{"type":"object","properties":{"loc":{"title":"Location","description":"error field","type":"array","items":{"type":"string"}},"type":{"title":"Type","description":"error type","type":"string"},"msg":{"title":"Message","description":"error message","type":"string"},"ctx":{"title":"Context","description":"error context","type":"string"},"in":{"title":"In","type":"string","enum":["path","query","header","cookie","body"]}},"required":["loc","type","msg"]}}}}}},"tags":["OpenAPI v1"]}}},"tags":[],"components":{"securitySchemes":{"BearerAuth":{"type":"http","scheme":"bearer"}},"schemas":{"AuthorEntity":{"properties":{"_id":{"title":"Id","type":"string"},"nickname":{"title":"Nickname","type":"string"},"avatar":{"title":"Avatar","type":"string"}},"required":["_id","nickname","avatar"],"title":"AuthorEntity","type":"object"},"ModelAudioQualityEntity":{"properties":{"filename":{"title":"Filename","type":"string"},"duration_ms":{"title":"Duration Ms","type":"number"},"language":{"default":"unknown","title":"Language","type":"string"},"quality":{"additionalProperties":{"type":"number"},"title":"Quality","type":"object"},"quality_passed":{"default":false,"title":"Quality Passed","type":"boolean"},"quality_reason":{"default":"","title":"Quality Reason","type":"string"}},"required":["filename","duration_ms"],"title":"ModelAudioQualityEntity","type":"object"},"ModelEntity":{"properties":{"_id":{"title":"Id","type":"string"},"type":{"enum":["svc","tts"],"title":"Type","type":"string"},"title":{"title":"Title","type":"string"},"description":{"title":"Description","type":"string"},"cover_image":{"title":"Cover Image","type":"string"},"train_mode":{"default":"full","enum":["fast","full"],"title":"Train Mode","type":"string"},"state":{"enum":["created","training","trained","failed"],"title":"State","type":"string"},"tags":{"items":{"type":"string"},"title":"Tags","type":"array"},"samples":{"default":[],"items":{"$ref":"#/components/schemas/SampleEntity"},"title":"Samples","type":"array"},"created_at":{"format":"date-time","title":"Created At","type":"string"},"updated_at":{"format":"date-time","title":"Updated At","type":"string"},"languages":{"default":[],"items":{"type":"string"},"title":"Languages","type":"array"},"visibility":{"enum":["public","unlist","private"],"title":"Visibility","type":"string"},"lock_visibility":{"default":false,"title":"Lock Visibility","type":"boolean"},"dmca_taken_down":{"anyOf":[{"type":"boolean"},{"type":"null"}],"default":false,"title":"Dmca Taken Down"},"default_text":{"default":"","title":"Default Text","type":"string"},"quality":{"anyOf":[{"$ref":"#/components/schemas/ModelQualityEntity"},{"type":"null"}],"default":null},"like_count":{"title":"Like Count","type":"integer"},"mark_count":{"title":"Mark Count","type":"integer"},"shared_count":{"title":"Shared Count","type":"integer"},"task_count":{"title":"Task Count","type":"integer"},"unliked":{"default":false,"title":"Unliked","type":"boolean"},"liked":{"default":false,"title":"Liked","type":"boolean"},"marked":{"default":false,"title":"Marked","type":"boolean"},"author":{"$ref":"#/components/schemas/AuthorEntity"}},"required":["_id","type","title","description","cover_image","state","tags","created_at","updated_at","visibility","like_count","mark_count","shared_count","task_count","author"],"title":"ModelEntity","type":"object"},"ModelQualityEntity":{"properties":{"audios":{"items":{"$ref":"#/components/schemas/ModelAudioQualityEntity"},"title":"Audios","type":"array"},"created_at":{"format":"date-time","title":"Created At","type":"string"},"updated_at":{"format":"date-time","title":"Updated At","type":"string"}},"required":["created_at","updated_at"],"title":"ModelQualityEntity","type":"object"},"SampleEntity":{"properties":{"title":{"title":"Title","type":"string"},"text":{"title":"Text","type":"string"},"task_id":{"title":"Task Id","type":"string"},"audio":{"title":"Audio","type":"string"}},"required":["title","text","task_id","audio"],"title":"SampleEntity","type":"object"},"ProsodyControl":{"description":"Controls for adjusting the prosody (rhythm and intonation) of generated speech.","properties":{"speed":{"default":1.0,"description":"Speaking rate multiplier. Valid range: 0.5 to 2.0. 1.0 = normal speed, 0.5 = half speed, 2.0 = double speed. Useful for adjusting pacing without regenerating audio.","title":"Speed","type":"number"},"volume":{"default":0.0,"description":"Volume adjustment in decibels (dB). 0 = no change, positive values = louder, negative values = quieter.","title":"Volume","type":"number"},"normalize_loudness":{"default":true,"description":"Normalize output loudness for more consistent perceived volume. **S2-Pro only.**","title":"Normalize Loudness","type":"boolean"}},"title":"ProsodyControl","type":"object"},"ReferenceAudio":{"description":"A voice sample with its transcript, used for zero-shot voice cloning. The model will attempt to match the voice characteristics from the audio sample.","properties":{"audio":{"description":"Raw audio bytes of the voice sample. Supported formats: WAV, MP3, FLAC. For best results, use 10-30 seconds of clear speech with minimal background noise.","format":"binary","title":"Audio","type":"string"},"text":{"description":"The exact transcript of what is spoken in the audio sample. Accuracy is important for voice cloning quality.","title":"Text","type":"string"}},"required":["audio","text"],"title":"ReferenceAudio","type":"object"},"TTSRequest":{"description":"Request body for text-to-speech synthesis. Supports single-speaker synthesis on all compatible TTS models. Multi-speaker dialogue synthesis is only available with the S2-Pro model.\n\n## Single Speaker\nProvide either `reference_id` (string) pointing to a voice model, or `references` (array of ReferenceAudio) for zero-shot cloning.\n\n## Multiple Speakers (Dialogue, S2-Pro only)\nFor multi-speaker synthesis, provide:\n- `reference_id`: array of voice model IDs, e.g., [\"speaker-0-id\", \"speaker-1-id\"]\n- `text`: use speaker tags `<|speaker:0|>`, `<|speaker:1|>`, etc. to indicate speaker changes, e.g., \"<|speaker:0|>Hello!<|speaker:1|>Hi there!\"\n\nAlternatively, for zero-shot multi-speaker:\n- `references`: 2D array where each inner array contains references for one speaker\n- `reference_id`: array of identifiers (can be arbitrary strings for zero-shot)\n\n## Example (Multi-Speaker with Model IDs)\n```json\n{\n  \"text\": \"<|speaker:0|>Good morning!<|speaker:1|>Good morning! How are you?<|speaker:0|>I'm great, thanks!\",\n  \"reference_id\": [\"model-id-alice\", \"model-id-bob\"]\n}\n```","properties":{"text":{"description":"Text to convert to speech.","title":"Text","type":"string"},"temperature":{"default":0.7,"description":"Controls expressiveness. Higher is more varied, lower is more consistent.","maximum":1.0,"minimum":0.0,"title":"Temperature","type":"number"},"top_p":{"default":0.7,"description":"Controls diversity via nucleus sampling.","maximum":1.0,"minimum":0.0,"title":"Top P","type":"number"},"references":{"anyOf":[{"description":"Single speaker: array of reference audio samples","items":{"$ref":"#/components/schemas/ReferenceAudio"},"type":"array"},{"description":"Multiple speakers: array of arrays, where each inner array contains reference samples for one speaker","items":{"items":{"$ref":"#/components/schemas/ReferenceAudio"},"type":"array"},"type":"array"},{"type":"null"}],"description":"Inline voice references for zero-shot cloning. Requires MessagePack (not JSON). For single speaker, provide an array of ReferenceAudio objects. For multiple speakers, provide an array of arrays where each inner array contains references for one speaker. **Multi-speaker is only available with the S2-Pro model.** The speaker index corresponds to the index in reference_id array. Example for multi-speaker: [[{audio, text}], [{audio, text}, {audio, text}]] for 2 speakers where speaker 1 has 2 reference samples.","title":"References"},"reference_id":{"anyOf":[{"description":"Single speaker: voice model ID string","type":"string"},{"description":"Multiple speakers: array of voice model IDs, one per speaker","items":{"type":"string"},"type":"array"},{"type":"null"}],"default":null,"description":"Voice model ID(s) from Fish Audio library or your custom models. For single-speaker synthesis, provide a string. For multi-speaker synthesis (dialogue), provide an array of model IDs. **Multi-speaker is only available with the S2-Pro model.** When using multiple speakers, use speaker tags in your text like `<|speaker:0|>` and `<|speaker:1|>` to indicate speaker changes. Example: `<|speaker:0|>Hello!<|speaker:1|>Hi there!<|speaker:0|>How are you?` with `reference_id: [\"speaker-a-id\", \"speaker-b-id\"]`.","title":"Reference Id"},"prosody":{"anyOf":[{"$ref":"#/components/schemas/ProsodyControl"},{"type":"null"}],"default":null,"description":"Speed and volume adjustments for the output."},"chunk_length":{"default":300,"description":"Text segment size for processing.","maximum":300,"minimum":100,"title":"Chunk Length","type":"integer"},"normalize":{"default":true,"description":"Normalizes text for English and Chinese, improving stability for numbers.","title":"Normalize","type":"boolean"},"format":{"default":"mp3","description":"Output audio format.","enum":["wav","pcm","mp3","opus"],"title":"Format","type":"string"},"sample_rate":{"anyOf":[{"type":"integer"},{"type":"null"}],"default":null,"description":"Audio sample rate in Hz. When null, uses the format's default (44100 Hz for most formats, 48000 Hz for opus).","title":"Sample Rate"},"mp3_bitrate":{"default":128,"description":"MP3 bitrate in kbps. Only applies when format is mp3.","enum":[64,128,192],"title":"Mp3 Bitrate","type":"integer"},"opus_bitrate":{"default":-1000,"description":"Opus bitrate in bps. -1000 for automatic. Only applies when format is opus.","enum":[-1000,24,32,48,64],"title":"Opus Bitrate","type":"integer"},"latency":{"default":"normal","description":"Latency-quality trade-off. normal: best quality, balanced: reduced latency, low: lowest latency.","enum":["low","normal","balanced"],"title":"Latency","type":"string"},"max_new_tokens":{"default":1024,"description":"Maximum audio tokens to generate per text chunk.","title":"Max New Tokens","type":"integer"},"repetition_penalty":{"default":1.2,"description":"Penalty for repeating audio patterns. Values above 1.0 reduce repetition.","title":"Repetition Penalty","type":"number"},"min_chunk_length":{"default":50,"description":"Minimum characters before splitting into a new chunk.","maximum":100,"minimum":0,"title":"Min Chunk Length","type":"integer"},"condition_on_previous_chunks":{"default":true,"description":"Use previous audio as context for voice consistency.","title":"Condition On Previous Chunks","type":"boolean"},"early_stop_threshold":{"default":1.0,"description":"Early stopping threshold for batch processing.","maximum":1.0,"minimum":0.0,"title":"Early Stop Threshold","type":"number"}},"required":["text"],"title":"TTSRequest","type":"object"},"ASRSegment":{"properties":{"text":{"title":"Text","type":"string"},"start":{"title":"Start","type":"number"},"end":{"title":"End","type":"number"}},"required":["text","start","end"],"title":"ASRSegment","type":"object"}}},"servers":[{"description":"Fish Audio API","url":"https://api.fish.audio"}]}