harrier-oss-v1-27b / mteb_v2_eval_prompts.json

Upload model

0c0fc62 verified 2 months ago

11.9 kB

	{
	"AILAStatutes-query": "Identifying the most relevant statutes for a given situation",
	"AfriSentiClassification": "Given a text, categorized by sentiment into positive, negative, or neutral",
	"AlloProfClusteringS2S.v2": "Identify the topic of document titles from Allo Prof dataset",
	"AlloprofReranking-query": "Given a question, retrieve passages that answer the question",
	"AmazonCounterfactualClassification": "Given an Amazon review, judge whether it is counterfactual.",
	"ArXivHierarchicalClusteringP2P": "Identify the main and secondary category of Arxiv papers based on the titles and abstracts",
	"ArXivHierarchicalClusteringS2S": "Identify the main and secondary category of Arxiv papers based on the titles",
	"ArguAna-query": "Given a claim, find documents that refute the claim",
	"ArmenianParaphrasePC": "Retrieve semantically similar text",
	"BUCC.v2": "Retrieve parallel sentences",
	"BelebeleRetrieval-query": "Retrieval the relevant passage for the given query",
	"BibleNLPBitextMining": "Retrieve parallel sentences",
	"BigPatentClustering.v2": "Identify the category of documents from the Big Patent dataset",
	"BiorxivClusteringP2P.v2": "Identify the main category of Biorxiv papers based on the titles and abstracts",
	"BornholmBitextMining": "Retrieve parallel sentences",
	"BrazilianToxicTweetsClassification": "Classify the toxic tweets in Brazilian Portuguese into one of the six categories: LGBTQ+phobia, Xenophobia, Obscene, Insult, Misogyny and Racism.",
	"BulgarianStoreReviewSentimentClassfication": "Classify user reviews into positive, negative or mixed sentiment",
	"CEDRClassification": "Given a comment as query, classify expressed emotions into joy, sadness, surprise, fear, and anger",
	"CLSClusteringP2P.v2": "Identify the main category of scholar papers based on the titles and abstracts",
	"CSFDSKMovieReviewSentimentClassification": "Given a movie review, classify its rating on a scale from 0 to 5",
	"CTKFactsNLI": "Retrieve semantically similar text",
	"CataloniaTweetClassification": "Given a tweet, classify its sentiment into AGAINST, FAVOR or NEUTRAL towards Catalonia's independence.",
	"Core17InstructionRetrieval-query": "Retrieve relevant passages for the given query with conditions",
	"CovidRetrieval-query": "Given a question on COVID-19, retrieve news articles that answer the question",
	"CyrillicTurkicLangClassification": "Given a text, classify its language",
	"CzechProductReviewSentimentClassification": "Classify product reviews into positive, neutral, or negative sentiment",
	"DBpediaClassification": "Given the following text, retrieve the appropriate DBpedia category including Company, EducationalInstitution, Artist, Athlete, OfficeHolder, MeanOfTransportation, Building, NaturalPlace, Village, Animal, Plant, Album, Film, WrittenWork.",
	"DalajClassification": "Classify texts based on linguistic acceptability in Swedish",
	"DiaBlaBitextMining": "Retrieve parallel sentences",
	"EstonianValenceClassification": "Given a news article, categorized by sentiment into negatiivne, positiivne, neutraalne or vastuolulin",
	"FaroeseSTS": "Retrieve semantically similar text",
	"FilipinoShopeeReviewsClassification": "Given a shop review, classify its rating on a scale from 1 to 5",
	"FinParaSTS": "Retrieve semantically similar text",
	"FinancialPhrasebankClassification": "Given financial news, categorized by sentiment into positive, negative, or neutral",
	"FloresBitextMining": "Retrieve parallel sentences",
	"GermanSTSBenchmark": "Retrieve semantically similar text",
	"GreekLegalCodeClassification": "Given a greek legal text, classify its topic",
	"GujaratiNewsClassification": "Given a Gujarati news articles, classify ist topic",
	"HALClusteringS2S.v2": "Identify the topic of titles from HAL",
	"HagridRetrieval-query": "Given a question, retrieve relevant responses",
	"IN22GenBitextMining": "Retrieve parallel sentences",
	"IndicCrosslingualSTS": "Retrieve semantically similar text",
	"IndicGenBenchFloresBitextMining": "Retrieve parallel sentences",
	"IndicLangClassification": "Given a text, classify its language",
	"IndonesianIdClickbaitClassification": "Given an Indonesian news headlines, classify its into clickbait or non-clickbait",
	"IsiZuluNewsClassification": "Given a news article, classify its topic",
	"ItaCaseholdClassification": "Given a judgments, classify its topic",
	"JSICK": "Retrieve semantically similar text",
	"KorHateSpeechMLClassification": "Given a Korean online news comments, classify its fine-grained hate speech classes",
	"KorSarcasmClassification": "Given a twitter, categorized it into sarcasm or not_sarcasm",
	"KurdishSentimentClassification": "Given a text, categorized by sentiment into positive or negative",
	"LEMBPasskeyRetrieval-query": "Retrieval the relevant passage for the given query",
	"LegalBenchCorporateLobbying-query": "Given a query, retrieve relevant legal bill summaries",
	"MIRACLRetrievalHardNegatives-query": "Retrieve Wikipedia passages that answer the question",
	"MLQARetrieval-query": "Retrieval the relevant passage for the given query",
	"MacedonianTweetSentimentClassification": "Given a Macedonian tweet, categorized by sentiment into positive, negative, or neutral",
	"MalteseNewsClassification": "Given a maltese new, classify its topic",
	"MasakhaNEWSClassification": "Classify the News in the given texts into one of the seven category: politics,sports,health,business,entertainment,technology,religion ",
	"MasakhaNEWSClusteringS2S": "Identify the topic or theme of the given news articles based on the titles",
	"MassiveIntentClassification": "Given a user utterance as query, find the user intents",
	"MedrxivClusteringP2P.v2": "Identify the main category of Medrxiv papers based on the titles and abstracts",
	"MultiEURLEXMultilabelClassification": "Given a text, classify its topic",
	"MultiHateClassification": "Given a text, categorized by sentiment into hate or non-hate",
	"NTREXBitextMining": "Retrieve parallel sentences",
	"NepaliNewsClassification": "Given a news article, categorized it into business, entertainment or sports",
	"News21InstructionRetrieval-query": "Retrieve relevant passages for the given query with conditions",
	"NollySentiBitextMining": "Retrieve parallel sentences",
	"NordicLangClassification": "Given a text in a Nordic language, classify the language into one of the following categories: Danish, Swedish, Norwegian (Nynorsk), Norwegian (Bokmål), Faroese, Icelandic.",
	"NorwegianCourtsBitextMining": "Retrieve parallel sentences",
	"NusaParagraphEmotionClassification": "Classify the emotion into one of the following categories: fear, sadness, anger, happy, love, surprise, shame.",
	"NusaTranslationBitextMining": "Retrieve parallel sentences",
	"NusaX-senti": "Given a text, categorized by sentiment into positive or negative",
	"NusaXBitextMining": "Retrieve parallel sentences",
	"OdiaNewsClassification": "Given a news article, categorized it into business, entertainment or sports",
	"OpusparcusPC": "Retrieve semantically similar text",
	"PAC": "Classify Polish contract clauses into one of the following two types: \"Safe Contract Clauses\" and \"Unfair Contract Clauses\".",
	"PawsXPairClassification": "Retrieve semantically similar text",
	"PlscClusteringP2P.v2": "Identify the category of titles+abstracts from Library of Science",
	"PoemSentimentClassification": "Given the following verse from a poem, classify its sentiment as negative, neutral, positive, or mixed.",
	"PolEmo2.0-OUT": "Classify the sentiment of products and school online reviews",
	"PpcPC": "Retrieve semantically similar text",
	"PunjabiNewsClassification": "Given a news article, categorized it into two-classes",
	"RTE3": "Retrieve semantically similar text",
	"Robust04InstructionRetrieval-query": "Retrieve relevant passages for the given query with conditions",
	"RomaniBibleClustering": "Identify verses from the Bible in Kalderash Romani by book.",
	"RuBQReranking-query": "Given a question, retrieve Wikipedia passages that answer the question",
	"SCIDOCS-query": "Given a scientific paper title, retrieve paper abstracts that are cited by the given paper",
	"SIB200ClusteringS2S": "Identify the category of documents",
	"SICK-R": "Retrieve semantically similar text",
	"STS12": "Retrieve semantically related sentences",
	"STS13": "Retrieve semantically similar text",
	"STS14": "Retrieve semantically similar text",
	"STS15": "Retrieve semantically similar text",
	"STS17": "Retrieve semantically similar text",
	"STS22.v2": "Given a document, retrieve semantically related documents",
	"STSB": "Retrieve semantically similar text",
	"STSBenchmark": "Retrieve semantically similar text",
	"STSES": "Given a Spanish sentence, retrieve semantically related Spanish sentences",
	"ScalaClassification": "Classify passages into correct or correct in Scandinavian Languages based on linguistic acceptability",
	"SemRel24STS": "Retrieve semantically similar text",
	"SentimentAnalysisHindi": "Given a hindi text, categorized by sentiment into positive, negative or neutral",
	"SinhalaNewsClassification": "Given a news article, categorized it into political, business, technology, sports and Entertainment",
	"SiswatiNewsClassification": "Identify fine-grained news categories in Siswati language.",
	"SlovakMovieReviewSentimentClassification": "Given a movie review, categorized it into positive or negative",
	"SpartQA-query": "Given the following spatial reasoning question, retrieve the right answer.",
	"SprintDuplicateQuestions": "Find questions that have the same meaning as the input question",
	"StackExchangeClustering.v2": "Identify the topic or theme of StackExchange posts based on the titles",
	"StackOverflowQA-query": "Given a question about coding, retrieval code or passage that can solve user's question",
	"StatcanDialogueDatasetRetrieval-query": "Retrieval the relevant passage for the given query",
	"SwahiliNewsClassification": "Given a news article, classify its domain",
	"SwednClusteringP2P": "Identify news categories in Swedish passages",
	"SwissJudgementClassification": "Given a news article, categorized it into approval or dismissal",
	"T2Reranking-query": "Given a Chinese search query, retrieve web passages that answer the question",
	"TERRa": "Given a premise, retrieve a hypothesis that is entailed by the premise",
	"TRECCOVID-query": "Given a medical query, retrieve documents that answer the query",
	"Tatoeba": "Retrieve parallel sentences",
	"TempReasonL1-query": "Given the following question about time, retrieve the correct answer.",
	"ToxicConversationsClassification": "Classify the given comments as either toxic or not toxic",
	"TswanaNewsClassification": "Given a news article, classify its topic",
	"TweetTopicSingleClassification": "Gvien a twitter, classify its topic",
	"TwitterHjerneRetrieval-query": "Retrieve answers to questions asked in Danish tweets",
	"TwitterURLCorpus": "Find tweets that have the same meaning as the input tweet",
	"VoyageMMarcoReranking-query": "Given a Japanese search query, retrieve web passages that answer the question",
	"WebLINXCandidatesReranking-query": "Retrieval the relevant passage for the given query",
	"WikiCitiesClustering": "Identify of Wikipedia articles of cities by country",
	"WikiClusteringP2P.v2": "Identify the category of wiki passages",
	"WikipediaRerankingMultilingual-query": "Retrieval the relevant passage for the given query",
	"WikipediaRetrievalMultilingual-query": "Retrieval the relevant passage for the given query",
	"WinoGrande-query": "Given the following sentence, retrieve an appropriate answer to fill in the missing underscored part.",
	"XNLI": "Retrieve semantically similar text",
	"indonli": "Retrieve semantically similar text"
	}