-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathstanfordCore.ts
More file actions
144 lines (128 loc) · 4.21 KB
/
stanfordCore.ts
File metadata and controls
144 lines (128 loc) · 4.21 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
import { task, configure, logger } from '@trigger.dev/sdk/v3';
import * as uuid from 'uuid';
import { uploadDocumentToRS } from './tasks/uploadDocumentToRS';
import { doStanfordNlp } from './tasks/doStandfordNlp';
import { nerToXML } from './tasks/nerToXML';
import { plainTextToXML } from './tasks/plainTextToXML';
import { createClient } from '@supabase/supabase-js';
import { Document } from '@recogito/studio-sdk';
import { xmlToPlainText } from './tasks/xmlToPlainText';
const translations = (docName: string) => ({
success: {
en: `Your document, ${docName}, has successfully been processed through NER and is now available in your project`,
de: `Your document, ${docName}, has successfully been processed through NER and is now available in your project`,
},
failure: {
en: `There was an error creating your document, ${docName}.`,
de: `There was an error creating your document, ${docName}.`,
},
goto: {
en: 'Go to your project',
de: 'Go to your project',
},
});
export const stanfordCore = task({
id: 'stanford-core',
run: async (payload: {
serverURL: string;
projectId: string;
documentId: string;
nameOut: string;
language: 'en' | 'de';
outputLanguage: 'en' | 'de';
key: string;
token: string;
userId: string;
}) => {
logger.info('Creating Supabase client');
const supabase = createClient(payload.serverURL, payload.key, {
global: {
headers: {
Authorization: `Bearer ${payload.token}`,
},
},
});
if (supabase) {
logger.info('Get Document data');
const docResult = await supabase
.from('documents')
.select()
.eq('id', payload.documentId)
.single();
if (docResult.error) {
logger.error(docResult.error.message);
throw new Error('Failed to fetch document data');
}
const doc: Document = docResult.data as unknown as Document;
if (!['text/xml', 'text/plain'].includes(doc.content_type || '')) {
logger.error('Wrong content type');
throw new Error('Wrong content type');
}
logger.info('Downloading File');
const { data, error } = await supabase.storage
.from('documents')
.download(payload.documentId);
if (error) {
logger.error(error.message);
throw new Error('Failed to download document');
}
let text: string;
let xml: string;
if (doc.content_type === 'text/plain') {
const initialText = await data.text();
logger.info('Calling Plain Text to XML');
const res = await plainTextToXML
.triggerAndWait({ text: initialText })
.unwrap();
text = res.text;
xml = res.xml;
} else {
const initialXML = await data.text();
logger.info('Calling XML to Plain Text');
const res = await xmlToPlainText
.triggerAndWait({ xml: initialXML })
.unwrap();
text = res.text;
xml = res.xml;
}
logger.info('Calling NLP NER');
const { ner } = await doStanfordNlp
.triggerAndWait({
data: text,
language: payload.language,
outputLanguage: payload.outputLanguage,
})
.unwrap();
const { tei } = await nerToXML
.triggerAndWait({ nerData: ner, text, originalXML: xml })
.unwrap();
const id = uuid.v4();
logger.info('Upload to Recogito Studio');
// Upload to Recogito Studio;
const res = await uploadDocumentToRS
.triggerAndWait({
id: id,
documentData: tei,
name: `${payload.nameOut}.xml`,
type: 'text/xml',
projectId: payload.projectId,
documentId: payload.documentId,
key: payload.key,
token: payload.token,
supabaseURL: payload.serverURL,
userId: payload.userId,
language: payload.outputLanguage,
successMessage: translations(payload.nameOut).success[
payload.outputLanguage
],
gotoMessage: translations(payload.nameOut).goto[
payload.outputLanguage
],
})
.unwrap();
if (!res) {
throw new Error('Failed to upload document');
}
}
},
});